softpipe: optimized path for simple mipmap sampling

linear-mip-linear-repeat-POT sampling faspath, provides a very nice
speedup to apps that do this common type of texturing.

Test case: demos/terrain, turn fog off, turn texturing on.
Without patch: 12 fps
With patch: 20 fps.
This commit is contained in:
Keith Whitwell 2009-08-20 11:25:20 +01:00
parent d12bae9368
commit 4f409da345

View file

@ -921,8 +921,8 @@ sp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
}
else
{
unsigned x1 = (uflr + 1) & (xpot - 1);
unsigned y1 = (vflr + 1) & (ypot - 1);
unsigned x1 = (x0 + 1) & (xpot - 1);
unsigned y1 = (y0 + 1) & (ypot - 1);
get_texel_quad_2d_mt(tgsi_sampler, 0, level,
x0, y0, x1, y1, tx);
}
@ -1028,42 +1028,92 @@ sp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler
struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler);
const struct pipe_texture *texture = samp->texture;
const struct pipe_sampler_state *sampler = samp->sampler;
int level0, level1;
int level0;
float lambda;
lambda = compute_lambda(texture, sampler, s, t, p, lodbias);
level0 = (int)lambda;
level1 = level0 + 1;
if (lambda < 0.0) {
samp->level = 0;
sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
s, t, p, lodbias, rgba );
s, t, p, 0, rgba );
}
else if (level0 >= texture->last_level) {
samp->level = texture->last_level;
sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
s, t, p, lodbias, rgba );
s, t, p, 0, rgba );
}
else {
float rgba0[4][4];
float rgba1[4][4];
int c,j;
float levelBlend = lambda - level0; /* blending weight between levels */
unsigned xpot = 1 << (samp->xpot - level0);
unsigned ypot = 1 << (samp->ypot - level0);
unsigned xpot1 = 1 << (samp->xpot - (level0+1));
unsigned ypot1 = 1 << (samp->ypot - (level0+1));
unsigned j;
samp->level = level0;
sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
s, t, p, lodbias, rgba0 );
for (j = 0; j < QUAD_SIZE; j++) {
int c;
float u = s[j] * xpot - 0.5F;
float v = t[j] * ypot - 0.5F;
samp->level++;
sp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
s, t, p, lodbias, rgba1 );
int uflr = util_ifloor(u);
int vflr = util_ifloor(v);
for (c = 0; c < 4; c++)
for (j = 0; j < 4; j++)
rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
}
float xw = u - (float)uflr;
float yw = v - (float)vflr;
int x0 = uflr & (xpot - 1);
int y0 = vflr & (ypot - 1);
const float *tx0[4];
const float *tx1[4];
if (x0 % TILE_SIZE != TILE_SIZE-1 &&
y0 % TILE_SIZE != TILE_SIZE-1)
{
get_texel_quad_2d(tgsi_sampler, 0, level0, x0, y0, tx0);
}
else
{
unsigned x1 = (x0 + 1) & (xpot - 1);
unsigned y1 = (y0 + 1) & (ypot - 1);
get_texel_quad_2d_mt(tgsi_sampler, 0, level0,
x0, y0, x1, y1, tx0);
}
x0 /= 2;
y0 /= 2;
/* also need to adjust xw, yw?? */
if (x0 % TILE_SIZE != TILE_SIZE-1 &&
y0 % TILE_SIZE != TILE_SIZE-1)
{
get_texel_quad_2d(tgsi_sampler, 0, level0+1, x0, y0, tx1);
}
else
{
unsigned x1 = (x0 + 1) & (xpot1 - 1);
unsigned y1 = (y0 + 1) & (ypot1 - 1);
get_texel_quad_2d_mt(tgsi_sampler, 0, level0+1,
x0, y0, x1, y1, tx1);
}
/* interpolate R, G, B, A */
for (c = 0; c < 4; c++) {
float rgba0 = lerp_2d(xw, yw,
tx0[0][c], tx0[1][c],
tx0[2][c], tx0[3][c]);
float rgba1 = lerp_2d(xw, yw,
tx1[0][c], tx1[1][c],
tx1[2][c], tx1[3][c]);
rgba[c][j] = lerp(levelBlend, rgba0, rgba1);
}
}
}
}
/**
@ -1567,10 +1617,7 @@ sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler,
if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
switch (sampler->min_img_filter) {
case PIPE_TEX_FILTER_LINEAR:
/* This one not working yet:
*/
if (0)
tgsi_sampler->get_samples = sp_get_samples_2d_linear_mip_linear_repeat_POT;
tgsi_sampler->get_samples = sp_get_samples_2d_linear_mip_linear_repeat_POT;
break;
default:
break;