mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-18 21:20:29 +01:00
llvmpipe/linear: refactor linear samplers into templated code.
Before adding new copies of all of these for swapping start by refactoring into macro templated code. I avoided using inline functions because I want to test with opts turned down, and this will kill perf. Reviewed-by: Brian Paul <brianp@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24066>
This commit is contained in:
parent
0230179f8b
commit
e43804ba65
3 changed files with 229 additions and 291 deletions
|
|
@ -79,12 +79,27 @@ fixed16_approx(int x, int y, int tol)
|
|||
return y - tol <= x && x <= y + tol;
|
||||
}
|
||||
|
||||
/* set alpha channel of rgba value to 0xff. */
|
||||
static inline uint32_t
|
||||
rgbx(uint32_t src_val)
|
||||
{
|
||||
return src_val | 0xff000000;
|
||||
}
|
||||
|
||||
/* set alpha channel of 128-bit 4xrgba values to 0xff. */
|
||||
static inline __m128i
|
||||
rgbx_128(const __m128i src_val)
|
||||
{
|
||||
const __m128i mask = _mm_set1_epi32(0xff000000);
|
||||
__m128i bgrx = _mm_or_si128(src_val, mask);
|
||||
return bgrx;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unstretched blit of a bgra texture.
|
||||
*/
|
||||
static const uint32_t *
|
||||
fetch_bgra_memcpy(struct lp_linear_elem *elem)
|
||||
fetch_memcpy_bgra(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
|
|
@ -109,223 +124,6 @@ fetch_bgra_memcpy(struct lp_linear_elem *elem)
|
|||
return row;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Unstretched blit of a bgrx texture.
|
||||
*/
|
||||
static const uint32_t *
|
||||
fetch_bgrx_memcpy(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
const uint32_t *src_row =
|
||||
(const uint32_t *)((const uint8_t *)texture->base +
|
||||
(samp->t >> FIXED16_SHIFT) * texture->row_stride[0]);
|
||||
const int s = samp->s;
|
||||
const int width = samp->width;
|
||||
uint32_t *row = samp->row;
|
||||
|
||||
src_row = &src_row[s >> FIXED16_SHIFT];
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
row[i] = src_row[i] | 0xff000000;
|
||||
}
|
||||
|
||||
samp->t += samp->dtdy;
|
||||
return row;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Perform nearest filtered lookup of a row of texels. Texture lookup
|
||||
* is assumed to be axis aligned but with arbitrary scaling.
|
||||
*
|
||||
* Texture coordinate interpolation is performed in 16.16 fixed point,
|
||||
* not to be confused with the 1.15 format used by the interpolants.
|
||||
*
|
||||
* After 64 pixels (ie. in the next tile), the starting point will be
|
||||
* recalculated with floating point arithmetic.
|
||||
*/
|
||||
static const uint32_t *
|
||||
fetch_bgra_axis_aligned(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
const uint32_t *src_row =
|
||||
(const uint32_t *)((const uint8_t *)texture->base +
|
||||
(samp->t >> FIXED16_SHIFT) * texture->row_stride[0]);
|
||||
const int dsdx = samp->dsdx;
|
||||
const int width = samp->width;
|
||||
uint32_t *row = samp->row;
|
||||
int s = samp->s;
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
row[i] = src_row[s>>FIXED16_SHIFT];
|
||||
s += dsdx;
|
||||
}
|
||||
|
||||
samp->t += samp->dtdy;
|
||||
return row;
|
||||
}
|
||||
|
||||
|
||||
static const uint32_t *
|
||||
fetch_bgrx_axis_aligned(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
const uint32_t *src_row =
|
||||
(const uint32_t *)((const uint8_t *)texture->base +
|
||||
(samp->t >> FIXED16_SHIFT) * texture->row_stride[0]);
|
||||
const int dsdx = samp->dsdx;
|
||||
const int width = samp->width;
|
||||
uint32_t *row = samp->row;
|
||||
int s = samp->s;
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
row[i] = src_row[s>>FIXED16_SHIFT] | 0xff000000;
|
||||
s += dsdx;
|
||||
}
|
||||
|
||||
samp->t += samp->dtdy;
|
||||
return row;
|
||||
}
|
||||
|
||||
|
||||
/* Non-axis aligned, but no clamping or wrapping required
|
||||
*/
|
||||
static const uint32_t *
|
||||
fetch_bgra(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
const uint8_t *src = texture->base;
|
||||
const int stride = texture->row_stride[0];
|
||||
const int dsdx = samp->dsdx;
|
||||
const int dtdx = samp->dtdx;
|
||||
const int width = samp->width;
|
||||
uint32_t *row = samp->row;
|
||||
int s = samp->s;
|
||||
int t = samp->t;
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
const uint8_t *texel = (src +
|
||||
(t>>FIXED16_SHIFT) * stride +
|
||||
(s>>FIXED16_SHIFT) * 4);
|
||||
|
||||
row[i] = *(const uint32_t *)texel;
|
||||
|
||||
s += dsdx;
|
||||
t += dtdx;
|
||||
}
|
||||
|
||||
samp->s += samp->dsdy;
|
||||
samp->t += samp->dtdy;
|
||||
return row;
|
||||
}
|
||||
|
||||
|
||||
static const uint32_t *
|
||||
fetch_bgrx(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
const uint8_t *src = texture->base;
|
||||
const int stride = texture->row_stride[0];
|
||||
const int dsdx = samp->dsdx;
|
||||
const int dtdx = samp->dtdx;
|
||||
const int width = samp->width;
|
||||
uint32_t *row = samp->row;
|
||||
int s = samp->s;
|
||||
int t = samp->t;
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
const uint8_t *texel = (src +
|
||||
(t>>FIXED16_SHIFT) * stride +
|
||||
(s>>FIXED16_SHIFT) * 4);
|
||||
|
||||
row[i] = (*(const uint32_t *)texel) | 0xff000000;
|
||||
|
||||
s += dsdx;
|
||||
t += dtdx;
|
||||
}
|
||||
|
||||
samp->s += samp->dsdy;
|
||||
samp->t += samp->dtdy;
|
||||
return row;
|
||||
}
|
||||
|
||||
|
||||
/* Non-axis aligned, clamped.
|
||||
*/
|
||||
static const uint32_t *
|
||||
fetch_bgra_clamp(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
const uint8_t *src = texture->base;
|
||||
const int stride = texture->row_stride[0];
|
||||
const int tex_height = texture->height - 1;
|
||||
const int tex_width = texture->width - 1;
|
||||
const int dsdx = samp->dsdx;
|
||||
const int dtdx = samp->dtdx;
|
||||
const int width = samp->width;
|
||||
uint32_t *row = samp->row;
|
||||
int s = samp->s;
|
||||
int t = samp->t;
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
int ct = CLAMP(t>>FIXED16_SHIFT, 0, tex_height);
|
||||
int cs = CLAMP(s>>FIXED16_SHIFT, 0, tex_width);
|
||||
|
||||
const uint8_t *texel = src + ct * stride + cs * 4;
|
||||
|
||||
row[i] = *(const uint32_t *)texel;
|
||||
|
||||
s += dsdx;
|
||||
t += dtdx;
|
||||
}
|
||||
|
||||
samp->s += samp->dsdy;
|
||||
samp->t += samp->dtdy;
|
||||
return row;
|
||||
}
|
||||
|
||||
|
||||
static const uint32_t *
|
||||
fetch_bgrx_clamp(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
const uint8_t *src = texture->base;
|
||||
const int stride = texture->row_stride[0];
|
||||
const int tex_height = texture->height - 1;
|
||||
const int tex_width = texture->width - 1;
|
||||
const int dsdx = samp->dsdx;
|
||||
const int dtdx = samp->dtdx;
|
||||
const int width = samp->width;
|
||||
uint32_t *row = samp->row;
|
||||
int s = samp->s;
|
||||
int t = samp->t;
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
int ct = CLAMP(t>>FIXED16_SHIFT, 0, tex_height);
|
||||
int cs = CLAMP(s>>FIXED16_SHIFT, 0, tex_width);
|
||||
|
||||
const uint8_t *texel = src + ct * stride + cs * 4;
|
||||
|
||||
row[i] = (*(const uint32_t *)texel) | 0xff000000;
|
||||
|
||||
s += dsdx;
|
||||
t += dtdx;
|
||||
}
|
||||
|
||||
samp->s += samp->dsdy;
|
||||
samp->t += samp->dtdy;
|
||||
return row;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fetch and stretch one row.
|
||||
*/
|
||||
|
|
@ -394,7 +192,7 @@ fetch_and_stretch_bgra_row(struct lp_linear_sampler *samp,
|
|||
* temporary or fetch sparsely.
|
||||
*/
|
||||
static const uint32_t *
|
||||
fetch_bgra_axis_aligned_linear(struct lp_linear_elem *elem)
|
||||
fetch_axis_aligned_linear_bgra(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const int width = samp->width;
|
||||
|
|
@ -431,7 +229,7 @@ fetch_bgra_axis_aligned_linear(struct lp_linear_elem *elem)
|
|||
* maximize.
|
||||
*/
|
||||
static const uint32_t *
|
||||
fetch_bgra_linear(struct lp_linear_elem *elem)
|
||||
fetch_linear_bgra(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
|
|
@ -485,7 +283,7 @@ fetch_bgra_linear(struct lp_linear_elem *elem)
|
|||
* maximize.
|
||||
*/
|
||||
static const uint32_t *
|
||||
fetch_bgra_clamp_linear(struct lp_linear_elem *elem)
|
||||
fetch_clamp_linear_bgra(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
|
|
@ -596,65 +394,16 @@ fetch_bgra_clamp_linear(struct lp_linear_elem *elem)
|
|||
return row;
|
||||
}
|
||||
|
||||
/* don't generate bgra 128-bits or memcpy ops they have their own path */
|
||||
#define FETCH_TYPE bgra
|
||||
#define OP
|
||||
#define NO_MEMCPY
|
||||
#include "lp_linear_sampler_tmp.h"
|
||||
|
||||
static const uint32_t *
|
||||
fetch_bgrx_axis_aligned_linear(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const __m128i mask = _mm_set1_epi32(0xff000000);
|
||||
uint32_t *dst_row = samp->row;
|
||||
const uint32_t *src_row = fetch_bgra_axis_aligned_linear(&samp->base);
|
||||
const int width = samp->width;
|
||||
|
||||
for (int i = 0; i < width; i += 4) {
|
||||
__m128i bgra = *(__m128i *)&src_row[i];
|
||||
__m128i bgrx = _mm_or_si128(bgra, mask);
|
||||
*(__m128i *)&dst_row[i] = bgrx;
|
||||
}
|
||||
|
||||
return dst_row;
|
||||
}
|
||||
|
||||
|
||||
static const uint32_t *
|
||||
fetch_bgrx_clamp_linear(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const __m128i mask = _mm_set1_epi32(0xff000000);
|
||||
uint32_t *row = samp->row;
|
||||
const int width = samp->width;
|
||||
|
||||
fetch_bgra_clamp_linear(&samp->base);
|
||||
|
||||
for (int i = 0; i < width; i += 4) {
|
||||
__m128i bgra = *(__m128i *)&row[i];
|
||||
__m128i bgrx = _mm_or_si128(bgra, mask);
|
||||
*(__m128i *)&row[i] = bgrx;
|
||||
}
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
|
||||
static const uint32_t *
|
||||
fetch_bgrx_linear(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const __m128i mask = _mm_set1_epi32(0xff000000);
|
||||
uint32_t *row = samp->row;
|
||||
const int width = samp->width;
|
||||
|
||||
fetch_bgra_linear(&samp->base);
|
||||
|
||||
for (int i = 0; i < width; i += 4) {
|
||||
__m128i bgra = *(__m128i *)&row[i];
|
||||
__m128i bgrx = _mm_or_si128(bgra, mask);
|
||||
*(__m128i *)&row[i] = bgrx;
|
||||
}
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
#define FETCH_TYPE bgrx
|
||||
#define OP rgbx
|
||||
#define OP128 rgbx_128
|
||||
#include "lp_linear_sampler_tmp.h"
|
||||
|
||||
static bool
|
||||
sampler_is_nearest(const struct lp_linear_sampler *samp,
|
||||
|
|
@ -875,23 +624,23 @@ lp_linear_init_sampler(struct lp_linear_sampler *samp,
|
|||
switch (sampler_state->texture_state.format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
if (need_wrap)
|
||||
samp->base.fetch = fetch_bgra_clamp;
|
||||
samp->base.fetch = fetch_clamp_bgra;
|
||||
else if (!samp->axis_aligned)
|
||||
samp->base.fetch = fetch_bgra;
|
||||
else if (samp->dsdx != FIXED16_ONE) // TODO: could be relaxed
|
||||
samp->base.fetch = fetch_bgra_axis_aligned;
|
||||
samp->base.fetch = fetch_axis_aligned_bgra;
|
||||
else
|
||||
samp->base.fetch = fetch_bgra_memcpy;
|
||||
samp->base.fetch = fetch_memcpy_bgra;
|
||||
return true;
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
if (need_wrap)
|
||||
samp->base.fetch = fetch_bgrx_clamp;
|
||||
samp->base.fetch = fetch_clamp_bgrx;
|
||||
else if (!samp->axis_aligned)
|
||||
samp->base.fetch = fetch_bgrx;
|
||||
else if (samp->dsdx != FIXED16_ONE) // TODO: could be relaxed
|
||||
samp->base.fetch = fetch_bgrx_axis_aligned;
|
||||
samp->base.fetch = fetch_axis_aligned_bgrx;
|
||||
else
|
||||
samp->base.fetch = fetch_bgrx_memcpy;
|
||||
samp->base.fetch = fetch_memcpy_bgrx;
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
|
|
@ -906,19 +655,19 @@ lp_linear_init_sampler(struct lp_linear_sampler *samp,
|
|||
switch (sampler_state->texture_state.format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
if (need_wrap)
|
||||
samp->base.fetch = fetch_bgra_clamp_linear;
|
||||
samp->base.fetch = fetch_clamp_linear_bgra;
|
||||
else if (!samp->axis_aligned)
|
||||
samp->base.fetch = fetch_bgra_linear;
|
||||
samp->base.fetch = fetch_linear_bgra;
|
||||
else
|
||||
samp->base.fetch = fetch_bgra_axis_aligned_linear;
|
||||
samp->base.fetch = fetch_axis_aligned_linear_bgra;
|
||||
return true;
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
if (need_wrap)
|
||||
samp->base.fetch = fetch_bgrx_clamp_linear;
|
||||
samp->base.fetch = fetch_clamp_linear_bgrx;
|
||||
else if (!samp->axis_aligned)
|
||||
samp->base.fetch = fetch_bgrx_linear;
|
||||
samp->base.fetch = fetch_linear_bgrx;
|
||||
else
|
||||
samp->base.fetch = fetch_bgrx_axis_aligned_linear;
|
||||
samp->base.fetch = fetch_axis_aligned_linear_bgrx;
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
|
|
|
|||
188
src/gallium/drivers/llvmpipe/lp_linear_sampler_tmp.h
Normal file
188
src/gallium/drivers/llvmpipe/lp_linear_sampler_tmp.h
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
/* sampler template functions */
|
||||
|
||||
#ifndef NO_MEMCPY
|
||||
/*
|
||||
* Unstretched blit of a bgrx texture.
|
||||
*/
|
||||
static const uint32_t *
|
||||
CONCAT2(fetch_memcpy_, FETCH_TYPE)(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
const uint32_t *src_row =
|
||||
(const uint32_t *)((const uint8_t *)texture->base +
|
||||
(samp->t >> FIXED16_SHIFT) * texture->row_stride[0]);
|
||||
const int s = samp->s;
|
||||
const int width = samp->width;
|
||||
uint32_t *row = samp->row;
|
||||
|
||||
src_row = &src_row[s >> FIXED16_SHIFT];
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
row[i] = OP(src_row[i]);
|
||||
}
|
||||
|
||||
samp->t += samp->dtdy;
|
||||
return row;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Perform nearest filtered lookup of a row of texels. Texture lookup
|
||||
* is assumed to be axis aligned but with arbitrary scaling.
|
||||
*
|
||||
* Texture coordinate interpolation is performed in 16.16 fixed point,
|
||||
* not to be confused with the 1.15 format used by the interpolants.
|
||||
*
|
||||
* After 64 pixels (ie. in the next tile), the starting point will be
|
||||
* recalculated with floating point arithmetic.
|
||||
*/
|
||||
static const uint32_t *
|
||||
CONCAT2(fetch_axis_aligned_, FETCH_TYPE)(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
const uint32_t *src_row =
|
||||
(const uint32_t *)((const uint8_t *)texture->base +
|
||||
(samp->t >> FIXED16_SHIFT) * texture->row_stride[0]);
|
||||
const int dsdx = samp->dsdx;
|
||||
const int width = samp->width;
|
||||
uint32_t *row = samp->row;
|
||||
int s = samp->s;
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
row[i] = OP(src_row[s>>FIXED16_SHIFT]);
|
||||
s += dsdx;
|
||||
}
|
||||
|
||||
samp->t += samp->dtdy;
|
||||
return row;
|
||||
}
|
||||
|
||||
/* Non-axis aligned, but no clamping or wrapping required
|
||||
*/
|
||||
static const uint32_t *
|
||||
CONCAT2(fetch_, FETCH_TYPE)(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
const uint8_t *src = texture->base;
|
||||
const int stride = texture->row_stride[0];
|
||||
const int dsdx = samp->dsdx;
|
||||
const int dtdx = samp->dtdx;
|
||||
const int width = samp->width;
|
||||
uint32_t *row = samp->row;
|
||||
int s = samp->s;
|
||||
int t = samp->t;
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
const uint8_t *texel = (src +
|
||||
(t>>FIXED16_SHIFT) * stride +
|
||||
(s>>FIXED16_SHIFT) * 4);
|
||||
|
||||
row[i] = OP(*(const uint32_t *)texel);
|
||||
|
||||
s += dsdx;
|
||||
t += dtdx;
|
||||
}
|
||||
|
||||
samp->s += samp->dsdy;
|
||||
samp->t += samp->dtdy;
|
||||
return row;
|
||||
}
|
||||
|
||||
/* Non-axis aligned, clamped.
|
||||
*/
|
||||
static const uint32_t *
|
||||
CONCAT2(fetch_clamp_, FETCH_TYPE)(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
const struct lp_jit_texture *texture = samp->texture;
|
||||
const uint8_t *src = texture->base;
|
||||
const int stride = texture->row_stride[0];
|
||||
const int tex_height = texture->height - 1;
|
||||
const int tex_width = texture->width - 1;
|
||||
const int dsdx = samp->dsdx;
|
||||
const int dtdx = samp->dtdx;
|
||||
const int width = samp->width;
|
||||
uint32_t *row = samp->row;
|
||||
int s = samp->s;
|
||||
int t = samp->t;
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
int ct = CLAMP(t>>FIXED16_SHIFT, 0, tex_height);
|
||||
int cs = CLAMP(s>>FIXED16_SHIFT, 0, tex_width);
|
||||
|
||||
const uint8_t *texel = src + ct * stride + cs * 4;
|
||||
|
||||
row[i] = OP(*(const uint32_t *)texel);
|
||||
|
||||
s += dsdx;
|
||||
t += dtdx;
|
||||
}
|
||||
|
||||
samp->s += samp->dsdy;
|
||||
samp->t += samp->dtdy;
|
||||
return row;
|
||||
}
|
||||
|
||||
#ifdef OP128
|
||||
static const uint32_t *
|
||||
CONCAT2(fetch_axis_aligned_linear_, FETCH_TYPE)(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
|
||||
uint32_t *dst_row = samp->row;
|
||||
const uint32_t *src_row = fetch_axis_aligned_linear_bgra(&samp->base);
|
||||
const int width = samp->width;
|
||||
|
||||
for (int i = 0; i < width; i += 4) {
|
||||
__m128i bgra = *(__m128i *)&src_row[i];
|
||||
__m128i rgba = OP128(bgra);
|
||||
*(__m128i *)&dst_row[i] = rgba;
|
||||
}
|
||||
|
||||
return dst_row;
|
||||
}
|
||||
|
||||
static const uint32_t *
|
||||
CONCAT2(fetch_clamp_linear_, FETCH_TYPE)(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
uint32_t *row = samp->row;
|
||||
const int width = samp->width;
|
||||
|
||||
fetch_clamp_linear_bgra(&samp->base);
|
||||
|
||||
for (int i = 0; i < width; i += 4) {
|
||||
__m128i bgra = *(__m128i *)&row[i];
|
||||
__m128i rgba = OP128(bgra);
|
||||
*(__m128i *)&row[i] = rgba;
|
||||
}
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
static const uint32_t *
|
||||
CONCAT2(fetch_linear_, FETCH_TYPE)(struct lp_linear_elem *elem)
|
||||
{
|
||||
struct lp_linear_sampler *samp = (struct lp_linear_sampler *)elem;
|
||||
uint32_t *row = samp->row;
|
||||
const int width = samp->width;
|
||||
|
||||
fetch_linear_bgra(&samp->base);
|
||||
|
||||
for (int i = 0; i < width; i += 4) {
|
||||
__m128i bgra = *(__m128i *)&row[i];
|
||||
__m128i rgba = OP128(bgra);
|
||||
*(__m128i *)&row[i] = rgba;
|
||||
}
|
||||
|
||||
return row;
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef OP
|
||||
#undef OP128
|
||||
#undef FETCH_TYPE
|
||||
#undef NO_MEMCPY
|
||||
|
|
@ -48,6 +48,7 @@ files_llvmpipe = files(
|
|||
'lp_linear_fastpath.c',
|
||||
'lp_linear_interp.c',
|
||||
'lp_linear_sampler.c',
|
||||
'lp_linear_sampler_tmp.h',
|
||||
'lp_memory.c',
|
||||
'lp_memory.h',
|
||||
'lp_perf.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue