llvmpipe: eliminate last usage of step array in rast_tmp.h

For 16 and 64 pixel levels, calculate a mask which is linear in x and
y (ie not in the swizzle layout).

When iterating over full and partial masks, figure out position by
manipulating the bit number set in the mask, rather than relying on
postion arrays.

Similarly, calculate the lower-level c values from dcdx, dcdy and the
position rather than relying on the step array.
This commit is contained in:
Keith Whitwell 2010-08-15 17:24:54 +01:00
parent ee0d1c29ee
commit 4c0641454b
2 changed files with 57 additions and 16 deletions

View file

@ -144,6 +144,35 @@ build_mask(int c, int dcdx, int dcdy)
return mask;
}
static INLINE unsigned
build_mask_linear(int c, int dcdx, int dcdy)
{
int mask = 0;
int c0 = c;
int c1 = c0 + dcdy;
int c2 = c1 + dcdy;
int c3 = c2 + dcdy;
mask |= ((c0 + 0 * dcdx) >> 31) & (1 << 0);
mask |= ((c0 + 1 * dcdx) >> 31) & (1 << 1);
mask |= ((c0 + 2 * dcdx) >> 31) & (1 << 2);
mask |= ((c0 + 3 * dcdx) >> 31) & (1 << 3);
mask |= ((c1 + 0 * dcdx) >> 31) & (1 << 4);
mask |= ((c1 + 1 * dcdx) >> 31) & (1 << 5);
mask |= ((c1 + 2 * dcdx) >> 31) & (1 << 6);
mask |= ((c1 + 3 * dcdx) >> 31) & (1 << 7);
mask |= ((c2 + 0 * dcdx) >> 31) & (1 << 8);
mask |= ((c2 + 1 * dcdx) >> 31) & (1 << 9);
mask |= ((c2 + 2 * dcdx) >> 31) & (1 << 10);
mask |= ((c2 + 3 * dcdx) >> 31) & (1 << 11);
mask |= ((c3 + 0 * dcdx) >> 31) & (1 << 12);
mask |= ((c3 + 1 * dcdx) >> 31) & (1 << 13);
mask |= ((c3 + 2 * dcdx) >> 31) & (1 << 14);
mask |= ((c3 + 3 * dcdx) >> 31) & (1 << 15);
return mask;
}
#define TAG(x) x##_1

View file

@ -84,8 +84,8 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
const int cox = c[j] + plane[j].eo * 4;
const int cio = c[j] + plane[j].ei * 4 - 1;
outmask |= build_mask(cox, dcdx, dcdy);
partmask |= build_mask(cio, dcdx, dcdy);
outmask |= build_mask_linear(cox, dcdx, dcdy);
partmask |= build_mask_linear(cio, dcdx, dcdy);
}
if (outmask == 0xffff)
@ -106,15 +106,19 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
*/
while (partial_mask) {
int i = ffs(partial_mask) - 1;
int px = x + pos_table4[i][0];
int py = y + pos_table4[i][1];
int ix = (i & 3) * 4;
int iy = (i >> 2) * 4;
int px = x + ix;
int py = y + iy;
int cx[NR_PLANES];
for (j = 0; j < NR_PLANES; j++)
cx[j] = c[j] + plane[j].step[i] * 4;
partial_mask &= ~(1 << i);
for (j = 0; j < NR_PLANES; j++)
cx[j] = (c[j]
- plane[j].dcdx * ix
+ plane[j].dcdy * iy);
TAG(do_block_4)(task, tri, plane, px, py, cx);
}
@ -122,8 +126,10 @@ TAG(do_block_16)(struct lp_rasterizer_task *task,
*/
while (inmask) {
int i = ffs(inmask) - 1;
int px = x + pos_table4[i][0];
int py = y + pos_table4[i][1];
int ix = (i & 3) * 4;
int iy = (i >> 2) * 4;
int px = x + ix;
int py = y + iy;
inmask &= ~(1 << i);
@ -169,8 +175,8 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
const int cox = c[j] + plane[j].eo * 16;
const int cio = c[j] + plane[j].ei * 16 - 1;
outmask |= build_mask(cox, dcdx, dcdy);
partmask |= build_mask(cio, dcdx, dcdy);
outmask |= build_mask_linear(cox, dcdx, dcdy);
partmask |= build_mask_linear(cio, dcdx, dcdy);
}
if (outmask == 0xffff)
@ -191,12 +197,16 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
*/
while (partial_mask) {
int i = ffs(partial_mask) - 1;
int px = x + pos_table16[i][0];
int py = y + pos_table16[i][1];
int ix = (i & 3) * 16;
int iy = (i >> 2) * 16;
int px = x + ix;
int py = y + iy;
int cx[NR_PLANES];
for (j = 0; j < NR_PLANES; j++)
cx[j] = c[j] + plane[j].step[i] * 16;
cx[j] = (c[j]
- plane[j].dcdx * ix
+ plane[j].dcdy * iy);
partial_mask &= ~(1 << i);
@ -208,8 +218,10 @@ TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
*/
while (inmask) {
int i = ffs(inmask) - 1;
int px = x + pos_table16[i][0];
int py = y + pos_table16[i][1];
int ix = (i & 3) * 16;
int iy = (i >> 2) * 16;
int px = x + ix;
int py = y + iy;
inmask &= ~(1 << i);