llvmpipe: pass mask into fragment shader

Move this code back out to C for now, will generate separately.

Shader now takes a mask parameter instead of C0/C1/C2/etc.

Shader does not currently use that parameter and rasterizes whole
pixel stamps always.
This commit is contained in:
Keith Whitwell 2010-06-17 21:19:09 +01:00 committed by José Fonseca
parent 3bd9aedbac
commit d4b64167b5
14 changed files with 899 additions and 788 deletions

View file

@ -103,10 +103,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
elem_types[LP_JIT_CTX_ALPHA_REF] = LLVMFloatType();
elem_types[LP_JIT_CTX_STENCIL_REF_FRONT] = LLVMInt32Type();
elem_types[LP_JIT_CTX_STENCIL_REF_BACK] = LLVMInt32Type();
elem_types[LP_JIT_CTX_SCISSOR_XMIN] = LLVMFloatType();
elem_types[LP_JIT_CTX_SCISSOR_YMIN] = LLVMFloatType();
elem_types[LP_JIT_CTX_SCISSOR_XMAX] = LLVMFloatType();
elem_types[LP_JIT_CTX_SCISSOR_YMAX] = LLVMFloatType();
elem_types[LP_JIT_CTX_BLEND_COLOR] = LLVMPointerType(LLVMInt8Type(), 0);
elem_types[LP_JIT_CTX_TEXTURES] = LLVMArrayType(texture_type,
PIPE_MAX_SAMPLERS);
@ -125,18 +121,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, stencil_ref_back,
screen->target, context_type,
LP_JIT_CTX_STENCIL_REF_BACK);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmin,
screen->target, context_type,
LP_JIT_CTX_SCISSOR_XMIN);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymin,
screen->target, context_type,
LP_JIT_CTX_SCISSOR_YMIN);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_xmax,
screen->target, context_type,
LP_JIT_CTX_SCISSOR_XMAX);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, scissor_ymax,
screen->target, context_type,
LP_JIT_CTX_SCISSOR_YMAX);
LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
screen->target, context_type,
LP_JIT_CTX_BLEND_COLOR);

View file

@ -89,9 +89,6 @@ struct lp_jit_context
uint32_t stencil_ref_front, stencil_ref_back;
/** floats, not ints */
float scissor_xmin, scissor_ymin, scissor_xmax, scissor_ymax;
/* FIXME: store (also?) in floats */
uint8_t *blend_color;
@ -108,10 +105,6 @@ enum {
LP_JIT_CTX_ALPHA_REF,
LP_JIT_CTX_STENCIL_REF_FRONT,
LP_JIT_CTX_STENCIL_REF_BACK,
LP_JIT_CTX_SCISSOR_XMIN,
LP_JIT_CTX_SCISSOR_YMIN,
LP_JIT_CTX_SCISSOR_XMAX,
LP_JIT_CTX_SCISSOR_YMAX,
LP_JIT_CTX_BLEND_COLOR,
LP_JIT_CTX_TEXTURES,
LP_JIT_CTX_COUNT
@ -130,18 +123,6 @@ enum {
#define lp_jit_context_stencil_ref_back_value(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_STENCIL_REF_BACK, "stencil_ref_back")
#define lp_jit_context_scissor_xmin_value(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMIN, "scissor_xmin")
#define lp_jit_context_scissor_ymin_value(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMIN, "scissor_ymin")
#define lp_jit_context_scissor_xmax_value(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_XMAX, "scissor_xmax")
#define lp_jit_context_scissor_ymax_value(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_SCISSOR_YMAX, "scissor_ymax")
#define lp_jit_context_blend_color(_builder, _ptr) \
lp_build_struct_get(_builder, _ptr, LP_JIT_CTX_BLEND_COLOR, "blend_color")
@ -160,12 +141,7 @@ typedef void
const void *dady,
uint8_t **color,
void *depth,
const int32_t c1,
const int32_t c2,
const int32_t c3,
const int32_t *step1,
const int32_t *step2,
const int32_t *step3,
uint32_t mask,
uint32_t *counter);

View file

@ -46,10 +46,10 @@ lp_print_counters(void)
{
if (LP_DEBUG & DEBUG_COUNTERS) {
unsigned total_64, total_16, total_4;
float p1, p2, p3;
float p1, p2, p3, p4;
debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
debug_printf("llvmpipe: nr_triangles: %9u\n", lp_count.nr_tris);
debug_printf("llvmpipe: nr_culled_triangles: %9u\n", lp_count.nr_culled_tris);
total_64 = (lp_count.nr_empty_64 +
lp_count.nr_fully_covered_64 +
@ -58,10 +58,13 @@ lp_print_counters(void)
p1 = 100.0 * (float) lp_count.nr_empty_64 / (float) total_64;
p2 = 100.0 * (float) lp_count.nr_fully_covered_64 / (float) total_64;
p3 = 100.0 * (float) lp_count.nr_partially_covered_64 / (float) total_64;
p4 = 100.0 * (float) lp_count.nr_shade_opaque_64 / (float) total_64;
debug_printf("llvmpipe: nr_empty_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
debug_printf("llvmpipe: nr_64x64: %9u\n", total_64);
debug_printf("llvmpipe: nr_fully_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_64, p2, total_64);
debug_printf("llvmpipe: nr_shade_opaque_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_shade_opaque_64, p4, total_64);
debug_printf("llvmpipe: nr_partially_covered_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_64, p3, total_64);
debug_printf("llvmpipe: nr_empty_64x64: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_64, p1, total_64);
total_16 = (lp_count.nr_empty_16 +
lp_count.nr_fully_covered_16 +
@ -71,25 +74,27 @@ lp_print_counters(void)
p2 = 100.0 * (float) lp_count.nr_fully_covered_16 / (float) total_16;
p3 = 100.0 * (float) lp_count.nr_partially_covered_16 / (float) total_16;
debug_printf("llvmpipe: nr_empty_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16);
debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%2.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
debug_printf("llvmpipe: nr_16x16: %9u\n", total_16);
debug_printf("llvmpipe: nr_fully_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_fully_covered_16, p2, total_16);
debug_printf("llvmpipe: nr_partially_covered_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_partially_covered_16, p3, total_16);
debug_printf("llvmpipe: nr_empty_16x16: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_16, p1, total_16);
total_4 = (lp_count.nr_empty_4 + lp_count.nr_non_empty_4);
p1 = 100.0 * (float) lp_count.nr_empty_4 / (float) total_4;
p2 = 100.0 * (float) lp_count.nr_non_empty_4 / (float) total_4;
debug_printf("llvmpipe: nr_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%2.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4);
debug_printf("llvmpipe: nr_4x4: %9u\n", total_4);
debug_printf("llvmpipe: nr_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_empty_4, p1, total_4);
debug_printf("llvmpipe: nr_non_empty_4x4: %9u (%3.0f%% of %u)\n", lp_count.nr_non_empty_4, p2, total_4);
debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear);
debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load);
debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store);
debug_printf("llvmpipe: nr_color_tile_clear: %9u\n", lp_count.nr_color_tile_clear);
debug_printf("llvmpipe: nr_color_tile_load: %9u\n", lp_count.nr_color_tile_load);
debug_printf("llvmpipe: nr_color_tile_store: %9u\n", lp_count.nr_color_tile_store);
debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles);
debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0);
debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles);
debug_printf("llvmpipe: nr_llvm_compiles: %u\n", lp_count.nr_llvm_compiles);
debug_printf("llvmpipe: total LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0);
debug_printf("llvmpipe: average LLVM compile time: %.2f sec\n", lp_count.llvm_compile_time / 1000000.0 / lp_count.nr_llvm_compiles);
}
}

View file

@ -44,6 +44,7 @@ struct lp_counters
unsigned nr_empty_64;
unsigned nr_fully_covered_64;
unsigned nr_partially_covered_64;
unsigned nr_shade_opaque_64;
unsigned nr_empty_16;
unsigned nr_fully_covered_16;
unsigned nr_partially_covered_16;

View file

@ -28,6 +28,7 @@
#include <limits.h>
#include "util/u_memory.h"
#include "util/u_math.h"
#include "util/u_rect.h"
#include "util/u_surface.h"
#include "lp_scene_queue.h"
@ -136,7 +137,6 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
struct lp_rasterizer *rast = task->rast;
struct lp_scene *scene = rast->curr_scene;
enum lp_texture_usage usage;
unsigned buf;
LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
@ -146,24 +146,8 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
task->x = x;
task->y = y;
if (scene->has_color_clear)
usage = LP_TEX_USAGE_WRITE_ALL;
else
usage = LP_TEX_USAGE_READ_WRITE;
/* get pointers to color tile(s) */
for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
struct llvmpipe_resource *lpt;
assert(cbuf);
lpt = llvmpipe_resource(cbuf->texture);
task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
cbuf->face + cbuf->zslice,
cbuf->level,
usage,
x, y);
assert(task->color_tiles[buf]);
}
/* reset pointers to color tile(s) */
memset(task->color_tiles, 0, sizeof(task->color_tiles));
/* get pointer to depth/stencil tile */
{
@ -222,7 +206,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
clear_color[2] == clear_color[3]) {
/* clear to grayscale value {x, x, x, x} */
for (i = 0; i < rast->state.nr_cbufs; i++) {
uint8_t *ptr = task->color_tiles[i];
uint8_t *ptr =
lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
}
}
@ -234,7 +219,8 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
*/
const unsigned chunk = TILE_SIZE / 4;
for (i = 0; i < rast->state.nr_cbufs; i++) {
uint8_t *c = task->color_tiles[i];
uint8_t *c =
lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
unsigned j;
for (j = 0; j < 4 * TILE_SIZE; j++) {
@ -378,8 +364,8 @@ lp_rast_load_color(struct lp_rasterizer_task *task,
* This is a bin command which is stored in all bins.
*/
void
lp_rast_store_color( struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
lp_rast_store_linear_color( struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
struct lp_rasterizer *rast = task->rast;
struct lp_scene *scene = rast->curr_scene;
@ -448,30 +434,54 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
/* run shader on 4x4 block */
variant->jit_function[RAST_WHOLE]( &state->jit_context,
tile_x + x, tile_y + y,
inputs->facing,
inputs->a0,
inputs->dadx,
inputs->dady,
color,
depth,
INT_MIN, INT_MIN, INT_MIN,
NULL, NULL, NULL, &task->vis_counter);
tile_x + x, tile_y + y,
inputs->facing,
inputs->a0,
inputs->dadx,
inputs->dady,
color,
depth,
0xffff,
&task->vis_counter);
}
}
}
/**
* Compute shading for a 4x4 block of pixels.
* Run the shader on all blocks in a tile. This is used when a tile is
* completely contained inside a triangle, and the shader is opaque.
* This is a bin command called during bin processing.
*/
void
lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
struct lp_rasterizer *rast = task->rast;
unsigned i;
LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
/* this will prevent converting the layout from tiled to linear */
for (i = 0; i < rast->state.nr_cbufs; i++) {
(void)lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
}
lp_rast_shade_tile(task, arg);
}
/**
* Compute shading for a 4x4 block of pixels inside a triangle.
* This is a bin command called during bin processing.
* \param x X position of quad in window coords
* \param y Y position of quad in window coords
*/
void lp_rast_shade_quads( struct lp_rasterizer_task *task,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
int32_t c1, int32_t c2, int32_t c3)
void
lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
unsigned mask)
{
const struct lp_rast_state *state = task->current_state;
struct lp_fragment_shader_variant *variant = state->variant;
@ -501,27 +511,21 @@ void lp_rast_shade_quads( struct lp_rasterizer_task *task,
assert(lp_check_alignment(state->jit_context.blend_color, 16));
assert(lp_check_alignment(inputs->step[0], 16));
assert(lp_check_alignment(inputs->step[1], 16));
assert(lp_check_alignment(inputs->step[2], 16));
/* run shader on 4x4 block */
variant->jit_function[RAST_EDGE_TEST]( &state->jit_context,
x, y,
inputs->facing,
inputs->a0,
inputs->dadx,
inputs->dady,
color,
depth,
c1, c2, c3,
inputs->step[0],
inputs->step[1],
inputs->step[2],
&task->vis_counter);
variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
x, y,
inputs->facing,
inputs->a0,
inputs->dadx,
inputs->dady,
color,
depth,
mask,
&task->vis_counter);
}
/**
* Set top row and left column of the tile's pixels to white. For debugging.
*/
@ -717,10 +721,17 @@ static struct {
{
RAST(clear_color),
RAST(clear_zstencil),
RAST(triangle),
RAST(triangle_1),
RAST(triangle_2),
RAST(triangle_3),
RAST(triangle_4),
RAST(triangle_5),
RAST(triangle_6),
RAST(triangle_7),
RAST(shade_tile),
RAST(shade_tile_opaque),
RAST(set_state),
RAST(store_color),
RAST(store_linear_color),
RAST(fence),
RAST(begin_query),
RAST(end_query),
@ -775,7 +786,8 @@ is_empty_bin( const struct cmd_bin *bin )
}
for (i = 0; i < head->count; i++)
if (head->cmd[i] != lp_rast_set_state) {
if (head->cmd[i] != lp_rast_set_state &&
head->cmd[i] != lp_rast_store_linear_color) {
return FALSE;
}

View file

@ -83,9 +83,6 @@ struct lp_rast_shader_inputs {
float (*a0)[4];
float (*dadx)[4];
float (*dady)[4];
/* edge/step info for 3 edges and 4x4 block of pixels */
PIPE_ALIGN_VAR(16) int step[3][16];
};
struct lp_rast_clearzs {
@ -93,6 +90,22 @@ struct lp_rast_clearzs {
unsigned clearzs_mask;
};
struct lp_rast_plane {
/* one-pixel sized trivial accept offsets for each plane */
int ei;
/* one-pixel sized trivial reject offsets for each plane */
int eo;
/* edge function values at minx,miny ?? */
int c;
int dcdx;
int dcdy;
/* edge/step info for 3 edges and 4x4 block of pixels */
const int *step;
};
/**
* Rasterization information for a triangle known to be in this bin,
@ -101,35 +114,16 @@ struct lp_rast_clearzs {
* Objects of this type are put into the lp_setup_context::data buffer.
*/
struct lp_rast_triangle {
/* inputs for the shader */
PIPE_ALIGN_VAR(16) struct lp_rast_shader_inputs inputs;
int step[3][16];
#ifdef DEBUG
float v[3][2];
#endif
/* one-pixel sized trivial accept offsets for each plane */
int ei1;
int ei2;
int ei3;
/* one-pixel sized trivial reject offsets for each plane */
int eo1;
int eo2;
int eo3;
/* y deltas for vertex pairs (in fixed pt) */
int dy12;
int dy23;
int dy31;
/* x deltas for vertex pairs (in fixed pt) */
int dx12;
int dx23;
int dx31;
/* edge function values at minx,miny ?? */
int c1, c2, c3;
/* inputs for the shader */
PIPE_ALIGN_VAR(16) struct lp_rast_shader_inputs inputs;
struct lp_rast_plane plane[7]; /* NOTE: may allocate fewer planes */
};
@ -153,7 +147,10 @@ lp_rast_finish( struct lp_rasterizer *rast );
union lp_rast_cmd_arg {
const struct lp_rast_shader_inputs *shade_tile;
const struct lp_rast_triangle *triangle;
struct {
const struct lp_rast_triangle *tri;
unsigned plane_mask;
} triangle;
const struct lp_rast_state *set_state;
uint8_t clear_color[4];
const struct lp_rast_clearzs *clear_zstencil;
@ -173,10 +170,12 @@ lp_rast_arg_inputs( const struct lp_rast_shader_inputs *shade_tile )
}
static INLINE union lp_rast_cmd_arg
lp_rast_arg_triangle( const struct lp_rast_triangle *triangle )
lp_rast_arg_triangle( const struct lp_rast_triangle *triangle,
unsigned plane_mask)
{
union lp_rast_cmd_arg arg;
arg.triangle = triangle;
arg.triangle.tri = triangle;
arg.triangle.plane_mask = plane_mask;
return arg;
}
@ -229,16 +228,31 @@ void lp_rast_clear_zstencil( struct lp_rasterizer_task *,
void lp_rast_set_state( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_triangle( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_triangle_1( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_triangle_2( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_triangle_3( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_triangle_4( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_triangle_5( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_triangle_6( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_triangle_7( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_shade_tile( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_shade_tile_opaque( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_fence( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );
void lp_rast_store_color( struct lp_rasterizer_task *,
void lp_rast_store_linear_color( struct lp_rasterizer_task *,
const union lp_rast_cmd_arg );

View file

@ -119,10 +119,12 @@ struct lp_rasterizer
};
void lp_rast_shade_quads( struct lp_rasterizer_task *task,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
int32_t c1, int32_t c2, int32_t c3);
void
lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
const struct lp_rast_shader_inputs *inputs,
unsigned x, unsigned y,
unsigned mask);
/**
@ -157,6 +159,40 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
}
/**
* Get pointer to the swizzled color tile
*/
static INLINE uint8_t *
lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
unsigned buf, enum lp_texture_usage usage)
{
struct lp_rasterizer *rast = task->rast;
assert(task->x % TILE_SIZE == 0);
assert(task->y % TILE_SIZE == 0);
assert(buf < rast->state.nr_cbufs);
if (!task->color_tiles[buf]) {
struct pipe_surface *cbuf = rast->curr_scene->fb.cbufs[buf];
struct llvmpipe_resource *lpt;
assert(cbuf);
lpt = llvmpipe_resource(cbuf->texture);
task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
cbuf->face + cbuf->zslice,
cbuf->level,
usage,
task->x,
task->y);
if (!task->color_tiles[buf]) {
/* out of memory - use dummy tile memory */
return lp_get_dummy_tile();
}
}
return task->color_tiles[buf];
}
/**
* Get the pointer to a 4x4 color block (within a 64x64 tile).
* We'll map the color buffer on demand here.
@ -174,6 +210,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
assert((x % TILE_VECTOR_WIDTH) == 0);
assert((y % TILE_VECTOR_HEIGHT) == 0);
color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
color = task->color_tiles[buf];
if (!color) {
/* out of memory - use dummy tile memory */
@ -217,15 +254,15 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
/* run shader on 4x4 block */
variant->jit_function[RAST_WHOLE]( &state->jit_context,
x, y,
inputs->facing,
inputs->a0,
inputs->dadx,
inputs->dady,
color,
depth,
INT_MIN, INT_MIN, INT_MIN,
NULL, NULL, NULL, &task->vis_counter );
x, y,
inputs->facing,
inputs->a0,
inputs->dadx,
inputs->dady,
color,
depth,
0xffff,
&task->vis_counter );
}

View file

@ -113,168 +113,31 @@ block_full_16(struct lp_rasterizer_task *task,
block_full_4(task, tri, x + ix, y + iy);
}
#define TAG(x) x##_1
#define NR_PLANES 1
#include "lp_rast_tri_tmp.h"
/**
* Pass the 4x4 pixel block to the shader function.
* Determination of which of the 16 pixels lies inside the triangle
* will be done as part of the fragment shader.
*/
static void
do_block_4(struct lp_rasterizer_task *task,
const struct lp_rast_triangle *tri,
int x, int y,
int c1, int c2, int c3)
{
assert(x >= 0);
assert(y >= 0);
#define TAG(x) x##_2
#define NR_PLANES 2
#include "lp_rast_tri_tmp.h"
lp_rast_shade_quads(task, &tri->inputs, x, y, -c1, -c2, -c3);
}
#define TAG(x) x##_3
#define NR_PLANES 3
#include "lp_rast_tri_tmp.h"
#define TAG(x) x##_4
#define NR_PLANES 4
#include "lp_rast_tri_tmp.h"
/**
* Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
* of the triangle's bounds.
*/
static void
do_block_16(struct lp_rasterizer_task *task,
const struct lp_rast_triangle *tri,
int x, int y,
int c0, int c1, int c2)
{
unsigned mask = 0;
int eo[3];
int c[3];
int i, j;
#define TAG(x) x##_5
#define NR_PLANES 5
#include "lp_rast_tri_tmp.h"
assert(x >= 0);
assert(y >= 0);
assert(x % 16 == 0);
assert(y % 16 == 0);
#define TAG(x) x##_6
#define NR_PLANES 6
#include "lp_rast_tri_tmp.h"
eo[0] = tri->eo1 * 4;
eo[1] = tri->eo2 * 4;
eo[2] = tri->eo3 * 4;
#define TAG(x) x##_7
#define NR_PLANES 7
#include "lp_rast_tri_tmp.h"
c[0] = c0;
c[1] = c1;
c[2] = c2;
for (j = 0; j < 3; j++) {
const int *step = tri->inputs.step[j];
const int cx = c[j] + eo[j];
/* Mask has bits set whenever we are outside any of the edges.
*/
for (i = 0; i < 16; i++) {
int out = cx + step[i] * 4;
mask |= (out >> 31) & (1 << i);
}
}
mask = ~mask & 0xffff;
while (mask) {
int i = ffs(mask) - 1;
int px = x + pos_table4[i][0];
int py = y + pos_table4[i][1];
int cx1 = c0 + tri->inputs.step[0][i] * 4;
int cx2 = c1 + tri->inputs.step[1][i] * 4;
int cx3 = c2 + tri->inputs.step[2][i] * 4;
mask &= ~(1 << i);
/* Don't bother testing if the 4x4 block is entirely in/out of
* the triangle. It's a little faster to do it in the jit code.
*/
LP_COUNT(nr_non_empty_4);
do_block_4(task, tri, px, py, cx1, cx2, cx3);
}
}
/**
* Scan the tile in chunks and figure out which pixels to rasterize
* for this triangle.
*/
void
lp_rast_triangle(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle;
const int x = task->x, y = task->y;
int ei[3], eo[3], c[3];
unsigned outmask, inmask, partial_mask;
unsigned i, j;
c[0] = tri->c1 + tri->dx12 * y - tri->dy12 * x;
c[1] = tri->c2 + tri->dx23 * y - tri->dy23 * x;
c[2] = tri->c3 + tri->dx31 * y - tri->dy31 * x;
eo[0] = tri->eo1 * 16;
eo[1] = tri->eo2 * 16;
eo[2] = tri->eo3 * 16;
ei[0] = tri->ei1 * 16;
ei[1] = tri->ei2 * 16;
ei[2] = tri->ei3 * 16;
outmask = 0;
inmask = 0xffff;
for (j = 0; j < 3; j++) {
const int *step = tri->inputs.step[j];
const int cox = c[j] + eo[j];
const int cio = ei[j]- eo[j];
/* Outmask has bits set whenever we are outside any of the
* edges.
*/
/* Inmask has bits set whenever we are inside all of the edges.
*/
for (i = 0; i < 16; i++) {
int out = cox + step[i] * 16;
int in = out + cio;
outmask |= (out >> 31) & (1 << i);
inmask &= ~((in >> 31) & (1 << i));
}
}
assert((outmask & inmask) == 0);
if (outmask == 0xffff)
return;
/* Invert mask, so that bits are set whenever we are at least
* partially inside all of the edges:
*/
partial_mask = ~inmask & ~outmask & 0xffff;
/* Iterate over partials:
*/
while (partial_mask) {
int i = ffs(partial_mask) - 1;
int px = x + pos_table16[i][0];
int py = y + pos_table16[i][1];
int cx1 = c[0] + tri->inputs.step[0][i] * 16;
int cx2 = c[1] + tri->inputs.step[1][i] * 16;
int cx3 = c[2] + tri->inputs.step[2][i] * 16;
partial_mask &= ~(1 << i);
LP_COUNT(nr_partially_covered_16);
do_block_16(task, tri, px, py, cx1, cx2, cx3);
}
/* Iterate over fulls:
*/
while (inmask) {
int i = ffs(inmask) - 1;
int px = x + pos_table16[i][0];
int py = y + pos_table16[i][1];
inmask &= ~(1 << i);
LP_COUNT(nr_fully_covered_16);
block_full_16(task, tri, px, py);
}
}

View file

@ -0,0 +1,238 @@
/**************************************************************************
*
* Copyright 2007-2010 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
/*
* Rasterization for binned triangles within a tile
*/
/**
* Prototype for a 7 plane rasterizer function. Will codegenerate
* several of these.
*
* XXX: Varients for more/fewer planes.
* XXX: Need ways of dropping planes as we descend.
* XXX: SIMD
*/
static void
TAG(do_block_4)(struct lp_rasterizer_task *task,
const struct lp_rast_triangle *tri,
const struct lp_rast_plane *plane,
int x, int y,
const int *c)
{
unsigned mask = 0;
int i;
for (i = 0; i < 16; i++) {
int any_negative = 0;
int j;
for (j = 0; j < NR_PLANES; j++)
any_negative |= (c[j] - 1 + plane[j].step[i]);
any_negative >>= 31;
mask |= (~any_negative) & (1 << i);
}
/* Now pass to the shader:
*/
if (mask)
lp_rast_shade_quads_mask(task, &tri->inputs, x, y, mask);
}
/**
* Evaluate a 16x16 block of pixels to determine which 4x4 subblocks are in/out
* of the triangle's bounds.
*/
static void
TAG(do_block_16)(struct lp_rasterizer_task *task,
const struct lp_rast_triangle *tri,
const struct lp_rast_plane *plane,
int x, int y,
const int *c)
{
unsigned outmask, inmask, partmask, partial_mask;
unsigned i, j;
outmask = 0; /* outside one or more trivial reject planes */
partmask = 0; /* outside one or more trivial accept planes */
for (j = 0; j < NR_PLANES; j++) {
const int *step = plane[j].step;
const int eo = plane[j].eo * 4;
const int ei = plane[j].ei * 4;
const int cox = c[j] + eo;
const int cio = ei - 1 - eo;
for (i = 0; i < 16; i++) {
int out = cox + step[i] * 4;
int part = out + cio;
outmask |= (out >> 31) & (1 << i);
partmask |= (part >> 31) & (1 << i);
}
}
if (outmask == 0xffff)
return;
/* Mask of sub-blocks which are inside all trivial accept planes:
*/
inmask = ~partmask & 0xffff;
/* Mask of sub-blocks which are inside all trivial reject planes,
* but outside at least one trivial accept plane:
*/
partial_mask = partmask & ~outmask;
assert((partial_mask & inmask) == 0);
/* Iterate over partials:
*/
while (partial_mask) {
int i = ffs(partial_mask) - 1;
int px = x + pos_table4[i][0];
int py = y + pos_table4[i][1];
int cx[NR_PLANES];
for (j = 0; j < NR_PLANES; j++)
cx[j] = c[j] + plane[j].step[i] * 4;
partial_mask &= ~(1 << i);
TAG(do_block_4)(task, tri, plane, px, py, cx);
}
/* Iterate over fulls:
*/
while (inmask) {
int i = ffs(inmask) - 1;
int px = x + pos_table4[i][0];
int py = y + pos_table4[i][1];
inmask &= ~(1 << i);
block_full_4(task, tri, px, py);
}
}
/**
* Scan the tile in chunks and figure out which pixels to rasterize
* for this triangle.
*/
void
TAG(lp_rast_triangle)(struct lp_rasterizer_task *task,
const union lp_rast_cmd_arg arg)
{
const struct lp_rast_triangle *tri = arg.triangle.tri;
unsigned plane_mask = arg.triangle.plane_mask;
const int x = task->x, y = task->y;
struct lp_rast_plane plane[NR_PLANES];
int c[NR_PLANES];
unsigned outmask, inmask, partmask, partial_mask;
unsigned i, j, nr_planes = 0;
while (plane_mask) {
int i = ffs(plane_mask) - 1;
plane[nr_planes] = tri->plane[i];
plane_mask &= ~(1 << i);
nr_planes++;
};
assert(nr_planes == NR_PLANES);
outmask = 0; /* outside one or more trivial reject planes */
partmask = 0; /* outside one or more trivial accept planes */
for (j = 0; j < NR_PLANES; j++) {
const int *step = plane[j].step;
const int eo = plane[j].eo * 16;
const int ei = plane[j].ei * 16;
int cox, cio;
c[j] = plane[j].c + plane[j].dcdy * y - plane[j].dcdx * x;
cox = c[j] + eo;
cio = ei - 1 - eo;
for (i = 0; i < 16; i++) {
int out = cox + step[i] * 16;
int part = out + cio;
outmask |= (out >> 31) & (1 << i);
partmask |= (part >> 31) & (1 << i);
}
}
if (outmask == 0xffff)
return;
/* Mask of sub-blocks which are inside all trivial accept planes:
*/
inmask = ~partmask & 0xffff;
/* Mask of sub-blocks which are inside all trivial reject planes,
* but outside at least one trivial accept plane:
*/
partial_mask = partmask & ~outmask;
assert((partial_mask & inmask) == 0);
/* Iterate over partials:
*/
while (partial_mask) {
int i = ffs(partial_mask) - 1;
int px = x + pos_table16[i][0];
int py = y + pos_table16[i][1];
int cx[NR_PLANES];
for (j = 0; j < NR_PLANES; j++)
cx[j] = c[j] + plane[j].step[i] * 16;
partial_mask &= ~(1 << i);
LP_COUNT(nr_partially_covered_16);
TAG(do_block_16)(task, tri, plane, px, py, cx);
}
/* Iterate over fulls:
*/
while (inmask) {
int i = ffs(inmask) - 1;
int px = x + pos_table16[i][0];
int py = y + pos_table16[i][1];
inmask &= ~(1 << i);
LP_COUNT(nr_fully_covered_16);
block_full_16(task, tri, px, py);
}
}
#undef TAG
#undef NR_PLANES

View file

@ -287,7 +287,7 @@ lp_setup_flush( struct lp_setup_context *setup,
* data to linear in the texture_unmap() function, which will
* not be a parallel/threaded operation as here.
*/
lp_scene_bin_everywhere(scene, lp_rast_store_color, dummy);
lp_scene_bin_everywhere(scene, lp_rast_store_linear_color, dummy);
}
@ -752,28 +752,6 @@ lp_setup_update_state( struct lp_setup_context *setup )
setup->dirty |= LP_SETUP_NEW_FS;
}
if (setup->dirty & LP_SETUP_NEW_SCISSOR) {
float *stored;
stored = lp_scene_alloc_aligned(scene, 4 * sizeof(int32_t), 16);
if (stored) {
stored[0] = (float) setup->scissor.current.minx;
stored[1] = (float) setup->scissor.current.miny;
stored[2] = (float) setup->scissor.current.maxx;
stored[3] = (float) setup->scissor.current.maxy;
setup->scissor.stored = stored;
setup->fs.current.jit_context.scissor_xmin = stored[0];
setup->fs.current.jit_context.scissor_ymin = stored[1];
setup->fs.current.jit_context.scissor_xmax = stored[2];
setup->fs.current.jit_context.scissor_ymax = stored[3];
}
setup->dirty |= LP_SETUP_NEW_FS;
}
if(setup->dirty & LP_SETUP_NEW_CONSTANTS) {
struct pipe_resource *buffer = setup->constants.current;

View file

@ -130,7 +130,6 @@ struct lp_setup_context
struct {
struct pipe_scissor_state current;
const void *stored;
} scissor;
unsigned dirty; /**< bitmask of LP_SETUP_NEW_x bits */

View file

@ -38,12 +38,78 @@
#define NUM_CHANNELS 4
struct tri_info {
float pixel_offset;
/* fixed point vertex coordinates */
int x[3];
int y[3];
/* float x,y deltas - all from the original coordinates
*/
float dy01, dy20;
float dx01, dx20;
float oneoverarea;
const float (*v0)[4];
const float (*v1)[4];
const float (*v2)[4];
boolean frontfacing;
};
static const int step_scissor_minx[16] = {
0, 1, 0, 1,
2, 3, 2, 3,
0, 1, 0, 1,
2, 3, 2, 3
};
static const int step_scissor_maxx[16] = {
0, -1, 0, -1,
-2, -3, -2, -3,
0, -1, 0, -1,
-2, -3, -2, -3
};
static const int step_scissor_miny[16] = {
0, 0, 1, 1,
0, 0, 1, 1,
2, 2, 3, 3,
2, 2, 3, 3
};
static const int step_scissor_maxy[16] = {
0, 0, -1, -1,
0, 0, -1, -1,
-2, -2, -3, -3,
-2, -2, -3, -3
};
static INLINE int
subpixel_snap(float a)
{
return util_iround(FIXED_ONE * a);
}
static INLINE float
fixed_to_float(int a)
{
return a * (1.0 / FIXED_ONE);
}
/**
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
*/
static void constant_coef( struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
static void constant_coef( struct lp_rast_triangle *tri,
unsigned slot,
const float value,
unsigned i )
@ -54,28 +120,21 @@ static void constant_coef( struct lp_setup_context *setup,
}
/**
* Compute a0, dadx and dady for a linearly interpolated coefficient,
* for a triangle.
*/
static void linear_coef( struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
float oneoverarea,
static void linear_coef( struct lp_rast_triangle *tri,
const struct tri_info *info,
unsigned slot,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
unsigned vert_attr,
unsigned i)
{
float a1 = v1[vert_attr][i];
float a2 = v2[vert_attr][i];
float a3 = v3[vert_attr][i];
float a0 = info->v0[vert_attr][i];
float a1 = info->v1[vert_attr][i];
float a2 = info->v2[vert_attr][i];
float da12 = a1 - a2;
float da31 = a3 - a1;
float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
float da01 = a0 - a1;
float da20 = a2 - a0;
float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;
@ -92,9 +151,9 @@ static void linear_coef( struct lp_setup_context *setup,
* to define a0 as the sample at a pixel center somewhere near vmin
* instead - i'll switch to this later.
*/
tri->inputs.a0[slot][i] = (a1 -
(dadx * (v1[0][0] - setup->pixel_offset) +
dady * (v1[0][1] - setup->pixel_offset)));
tri->inputs.a0[slot][i] = (a0 -
(dadx * (info->v0[0][0] - info->pixel_offset) +
dady * (info->v0[0][1] - info->pixel_offset)));
}
@ -106,31 +165,27 @@ static void linear_coef( struct lp_setup_context *setup,
* Later, when we compute the value at a particular fragment position we'll
* divide the interpolated value by the interpolated W at that fragment.
*/
static void perspective_coef( struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
float oneoverarea,
static void perspective_coef( struct lp_rast_triangle *tri,
const struct tri_info *info,
unsigned slot,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
unsigned vert_attr,
unsigned i)
{
/* premultiply by 1/w (v[0][3] is always 1/w):
*/
float a1 = v1[vert_attr][i] * v1[0][3];
float a2 = v2[vert_attr][i] * v2[0][3];
float a3 = v3[vert_attr][i] * v3[0][3];
float da12 = a1 - a2;
float da31 = a3 - a1;
float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * oneoverarea;
float dady = (da31 * tri->dx12 - tri->dx31 * da12) * oneoverarea;
float a0 = info->v0[vert_attr][i] * info->v0[0][3];
float a1 = info->v1[vert_attr][i] * info->v1[0][3];
float a2 = info->v2[vert_attr][i] * info->v2[0][3];
float da01 = a0 - a1;
float da20 = a2 - a0;
float dadx = (da01 * info->dy20 - info->dy01 * da20) * info->oneoverarea;
float dady = (da20 * info->dx01 - info->dx20 * da01) * info->oneoverarea;
tri->inputs.dadx[slot][i] = dadx;
tri->inputs.dady[slot][i] = dady;
tri->inputs.a0[slot][i] = (a1 -
(dadx * (v1[0][0] - setup->pixel_offset) +
dady * (v1[0][1] - setup->pixel_offset)));
tri->inputs.a0[slot][i] = (a0 -
(dadx * (info->v0[0][0] - info->pixel_offset) +
dady * (info->v0[0][1] - info->pixel_offset)));
}
@ -141,13 +196,9 @@ static void perspective_coef( struct lp_setup_context *setup,
* We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
*/
static void
setup_fragcoord_coef(struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
float oneoverarea,
setup_fragcoord_coef(struct lp_rast_triangle *tri,
const struct tri_info *info,
unsigned slot,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
unsigned usage_mask)
{
/*X*/
@ -166,12 +217,12 @@ setup_fragcoord_coef(struct lp_setup_context *setup,
/*Z*/
if (usage_mask & TGSI_WRITEMASK_Z) {
linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 2);
linear_coef(tri, info, slot, 0, 2);
}
/*W*/
if (usage_mask & TGSI_WRITEMASK_W) {
linear_coef(setup, tri, oneoverarea, slot, v1, v2, v3, 0, 3);
linear_coef(tri, info, slot, 0, 3);
}
}
@ -180,24 +231,23 @@ setup_fragcoord_coef(struct lp_setup_context *setup,
* Setup the fragment input attribute with the front-facing value.
* \param frontface is the triangle front facing?
*/
static void setup_facing_coef( struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
static void setup_facing_coef( struct lp_rast_triangle *tri,
unsigned slot,
boolean frontface,
unsigned usage_mask)
{
/* convert TRUE to 1.0 and FALSE to -1.0 */
if (usage_mask & TGSI_WRITEMASK_X)
constant_coef( setup, tri, slot, 2.0f * frontface - 1.0f, 0 );
constant_coef( tri, slot, 2.0f * frontface - 1.0f, 0 );
if (usage_mask & TGSI_WRITEMASK_Y)
constant_coef( setup, tri, slot, 0.0f, 1 ); /* wasted */
constant_coef( tri, slot, 0.0f, 1 ); /* wasted */
if (usage_mask & TGSI_WRITEMASK_Z)
constant_coef( setup, tri, slot, 0.0f, 2 ); /* wasted */
constant_coef( tri, slot, 0.0f, 2 ); /* wasted */
if (usage_mask & TGSI_WRITEMASK_W)
constant_coef( setup, tri, slot, 0.0f, 3 ); /* wasted */
constant_coef( tri, slot, 0.0f, 3 ); /* wasted */
}
@ -206,11 +256,7 @@ static void setup_facing_coef( struct lp_setup_context *setup,
*/
static void setup_tri_coefficients( struct lp_setup_context *setup,
struct lp_rast_triangle *tri,
float oneoverarea,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
boolean frontface)
const struct tri_info *info)
{
unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
unsigned slot;
@ -227,25 +273,25 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
if (setup->flatshade_first) {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
constant_coef(setup, tri, slot+1, v1[vert_attr][i], i);
constant_coef(tri, slot+1, info->v0[vert_attr][i], i);
}
else {
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
constant_coef(setup, tri, slot+1, v3[vert_attr][i], i);
constant_coef(tri, slot+1, info->v2[vert_attr][i], i);
}
break;
case LP_INTERP_LINEAR:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
linear_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
linear_coef(tri, info, slot+1, vert_attr, i);
break;
case LP_INTERP_PERSPECTIVE:
for (i = 0; i < NUM_CHANNELS; i++)
if (usage_mask & (1 << i))
perspective_coef(setup, tri, oneoverarea, slot+1, v1, v2, v3, vert_attr, i);
perspective_coef(tri, info, slot+1, vert_attr, i);
fragcoord_usage_mask |= TGSI_WRITEMASK_W;
break;
@ -259,7 +305,7 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
break;
case LP_INTERP_FACING:
setup_facing_coef(setup, tri, slot+1, frontface, usage_mask);
setup_facing_coef(tri, slot+1, info->frontfacing, usage_mask);
break;
default:
@ -269,16 +315,11 @@ static void setup_tri_coefficients( struct lp_setup_context *setup,
/* The internal position input is in slot zero:
*/
setup_fragcoord_coef(setup, tri, oneoverarea, 0, v1, v2, v3,
fragcoord_usage_mask);
setup_fragcoord_coef(tri, info, 0, fragcoord_usage_mask);
}
static INLINE int subpixel_snap( float a )
{
return util_iround(FIXED_ONE * a - (FIXED_ONE / 2));
}
@ -291,21 +332,25 @@ static INLINE int subpixel_snap( float a )
* \return pointer to triangle space
*/
static INLINE struct lp_rast_triangle *
alloc_triangle(struct lp_scene *scene, unsigned nr_inputs, unsigned *tri_size)
alloc_triangle(struct lp_scene *scene,
unsigned nr_inputs,
unsigned nr_planes,
unsigned *tri_size)
{
unsigned input_array_sz = NUM_CHANNELS * (nr_inputs + 1) * sizeof(float);
struct lp_rast_triangle *tri;
unsigned bytes;
unsigned tri_bytes, bytes;
char *inputs;
assert(sizeof(*tri) % 16 == 0);
bytes = sizeof(*tri) + (3 * input_array_sz);
tri_bytes = align(Offset(struct lp_rast_triangle, plane[nr_planes]), 16);
bytes = tri_bytes + (3 * input_array_sz);
tri = lp_scene_alloc_aligned( scene, bytes, 16 );
if (tri) {
inputs = (char *) (tri + 1);
inputs = ((char *)tri) + tri_bytes;
tri->inputs.a0 = (float (*)[4]) inputs;
tri->inputs.dadx = (float (*)[4]) (inputs + input_array_sz);
tri->inputs.dady = (float (*)[4]) (inputs + 2 * input_array_sz);
@ -329,52 +374,71 @@ print_triangle(struct lp_setup_context *setup,
uint i;
debug_printf("llvmpipe triangle\n");
for (i = 0; i < setup->fs.nr_inputs; i++) {
for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v1[%d]: %f %f %f %f\n", i,
v1[i][0], v1[i][1], v1[i][2], v1[i][3]);
}
for (i = 0; i < setup->fs.nr_inputs; i++) {
for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v2[%d]: %f %f %f %f\n", i,
v2[i][0], v2[i][1], v2[i][2], v2[i][3]);
}
for (i = 0; i < setup->fs.nr_inputs; i++) {
for (i = 0; i < 1 + setup->fs.nr_inputs; i++) {
debug_printf(" v3[%d]: %f %f %f %f\n", i,
v3[i][0], v3[i][1], v3[i][2], v3[i][3]);
}
}
lp_rast_cmd lp_rast_tri_tab[8] = {
NULL, /* should be impossible */
lp_rast_triangle_1,
lp_rast_triangle_2,
lp_rast_triangle_3,
lp_rast_triangle_4,
lp_rast_triangle_5,
lp_rast_triangle_6,
lp_rast_triangle_7
};
/**
* Do basic setup for triangle rasterization and determine which
* framebuffer tiles are touched. Put the triangle in the scene's
* bins for the tiles which we overlap.
*/
static void
static void
do_triangle_ccw(struct lp_setup_context *setup,
const float (*v1)[4],
const float (*v2)[4],
const float (*v3)[4],
boolean frontfacing )
{
/* x/y positions in fixed point */
const int x1 = subpixel_snap(v1[0][0] + 0.5 - setup->pixel_offset);
const int x2 = subpixel_snap(v2[0][0] + 0.5 - setup->pixel_offset);
const int x3 = subpixel_snap(v3[0][0] + 0.5 - setup->pixel_offset);
const int y1 = subpixel_snap(v1[0][1] + 0.5 - setup->pixel_offset);
const int y2 = subpixel_snap(v2[0][1] + 0.5 - setup->pixel_offset);
const int y3 = subpixel_snap(v3[0][1] + 0.5 - setup->pixel_offset);
struct lp_scene *scene = lp_setup_get_current_scene(setup);
struct lp_fragment_shader_variant *variant = setup->fs.current.variant;
struct lp_rast_triangle *tri;
struct tri_info info;
int area;
float oneoverarea;
int minx, maxx, miny, maxy;
int ix0, ix1, iy0, iy1;
unsigned tri_bytes;
int i;
int nr_planes = 3;
if (0)
print_triangle(setup, v1, v2, v3);
tri = alloc_triangle(scene, setup->fs.nr_inputs, &tri_bytes);
if (setup->scissor_test) {
nr_planes = 7;
}
else {
nr_planes = 3;
}
tri = alloc_triangle(scene,
setup->fs.nr_inputs,
nr_planes,
&tri_bytes);
if (!tri)
return;
@ -387,15 +451,24 @@ do_triangle_ccw(struct lp_setup_context *setup,
tri->v[2][1] = v3[0][1];
#endif
tri->dx12 = x1 - x2;
tri->dx23 = x2 - x3;
tri->dx31 = x3 - x1;
/* x/y positions in fixed point */
info.x[0] = subpixel_snap(v1[0][0] - setup->pixel_offset);
info.x[1] = subpixel_snap(v2[0][0] - setup->pixel_offset);
info.x[2] = subpixel_snap(v3[0][0] - setup->pixel_offset);
info.y[0] = subpixel_snap(v1[0][1] - setup->pixel_offset);
info.y[1] = subpixel_snap(v2[0][1] - setup->pixel_offset);
info.y[2] = subpixel_snap(v3[0][1] - setup->pixel_offset);
tri->dy12 = y1 - y2;
tri->dy23 = y2 - y3;
tri->dy31 = y3 - y1;
tri->plane[0].dcdy = info.x[0] - info.x[1];
tri->plane[1].dcdy = info.x[1] - info.x[2];
tri->plane[2].dcdy = info.x[2] - info.x[0];
area = (tri->dx12 * tri->dy31 - tri->dx31 * tri->dy12);
tri->plane[0].dcdx = info.y[0] - info.y[1];
tri->plane[1].dcdx = info.y[1] - info.y[2];
tri->plane[2].dcdx = info.y[2] - info.y[0];
area = (tri->plane[0].dcdy * tri->plane[2].dcdx -
tri->plane[2].dcdy * tri->plane[0].dcdx);
LP_COUNT(nr_tris);
@ -410,20 +483,35 @@ do_triangle_ccw(struct lp_setup_context *setup,
}
/* Bounding rectangle (in pixels) */
minx = (MIN3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
maxx = (MAX3(x1, x2, x3) + (FIXED_ONE-1)) >> FIXED_ORDER;
miny = (MIN3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
maxy = (MAX3(y1, y2, y3) + (FIXED_ONE-1)) >> FIXED_ORDER;
{
/* Yes this is necessary to accurately calculate bounding boxes
* with the two fill-conventions we support. GL (normally) ends
* up needing a bottom-left fill convention, which requires
* slightly different rounding.
*/
int adj = (setup->pixel_offset != 0) ? 1 : 0;
minx = (MIN3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
maxx = (MAX3(info.x[0], info.x[1], info.x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER;
miny = (MIN3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
maxy = (MAX3(info.y[0], info.y[1], info.y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER;
}
if (setup->scissor_test) {
minx = MAX2(minx, setup->scissor.current.minx);
maxx = MIN2(maxx, setup->scissor.current.maxx);
miny = MAX2(miny, setup->scissor.current.miny);
maxy = MIN2(maxy, setup->scissor.current.maxy);
}
else {
minx = MAX2(minx, 0);
miny = MAX2(miny, 0);
maxx = MIN2(maxx, scene->fb.width);
maxy = MIN2(maxy, scene->fb.height);
}
if (miny == maxy ||
minx == maxx) {
if (miny >= maxy || minx >= maxx) {
lp_scene_putback_data( scene, tri_bytes );
LP_COUNT(nr_culled_tris);
return;
@ -431,75 +519,87 @@ do_triangle_ccw(struct lp_setup_context *setup,
/*
*/
oneoverarea = ((float)FIXED_ONE) / (float)area;
info.pixel_offset = setup->pixel_offset;
info.v0 = v1;
info.v1 = v2;
info.v2 = v3;
info.dx01 = info.v0[0][0] - info.v1[0][0];
info.dx20 = info.v2[0][0] - info.v0[0][0];
info.dy01 = info.v0[0][1] - info.v1[0][1];
info.dy20 = info.v2[0][1] - info.v0[0][1];
info.oneoverarea = 1.0 / (info.dx01 * info.dy20 - info.dx20 * info.dy01);
info.frontfacing = frontfacing;
/* Setup parameter interpolants:
*/
setup_tri_coefficients( setup, tri, oneoverarea, v1, v2, v3, frontfacing );
setup_tri_coefficients( setup, tri, &info );
tri->inputs.facing = frontfacing ? 1.0F : -1.0F;
/* half-edge constants, will be interated over the whole render target.
*/
tri->c1 = tri->dy12 * x1 - tri->dx12 * y1;
tri->c2 = tri->dy23 * x2 - tri->dx23 * y2;
tri->c3 = tri->dy31 * x3 - tri->dx31 * y3;
/* correct for top-left fill convention:
*/
if (tri->dy12 < 0 || (tri->dy12 == 0 && tri->dx12 > 0)) tri->c1++;
if (tri->dy23 < 0 || (tri->dy23 == 0 && tri->dx23 > 0)) tri->c2++;
if (tri->dy31 < 0 || (tri->dy31 == 0 && tri->dx31 > 0)) tri->c3++;
for (i = 0; i < 3; i++) {
struct lp_rast_plane *plane = &tri->plane[i];
tri->dy12 *= FIXED_ONE;
tri->dy23 *= FIXED_ONE;
tri->dy31 *= FIXED_ONE;
/* half-edge constants, will be interated over the whole render
* target.
*/
plane->c = plane->dcdx * info.x[i] - plane->dcdy * info.y[i];
tri->dx12 *= FIXED_ONE;
tri->dx23 *= FIXED_ONE;
tri->dx31 *= FIXED_ONE;
/* correct for top-left vs. bottom-left fill convention.
*
* note that we're overloading gl_rasterization_rules to mean
* both (0.5,0.5) pixel centers *and* bottom-left filling
* convention.
*
* GL actually has a top-left filling convention, but GL's
* notion of "top" differs from gallium's...
*
* Also, sometimes (in FBO cases) GL will render upside down
* to its usual method, in which case it will probably want
* to use the opposite, top-left convention.
*/
if (plane->dcdx < 0) {
/* both fill conventions want this - adjust for left edges */
plane->c++;
}
else if (plane->dcdx == 0) {
if (setup->pixel_offset == 0) {
/* correct for top-left fill convention:
*/
if (plane->dcdy > 0) plane->c++;
}
else {
/* correct for bottom-left fill convention:
*/
if (plane->dcdy < 0) plane->c++;
}
}
/* find trivial reject offsets for each edge for a single-pixel
* sized block. These will be scaled up at each recursive level to
* match the active blocksize. Scaling in this way works best if
* the blocks are square.
*/
tri->eo1 = 0;
if (tri->dy12 < 0) tri->eo1 -= tri->dy12;
if (tri->dx12 > 0) tri->eo1 += tri->dx12;
plane->dcdx *= FIXED_ONE;
plane->dcdy *= FIXED_ONE;
tri->eo2 = 0;
if (tri->dy23 < 0) tri->eo2 -= tri->dy23;
if (tri->dx23 > 0) tri->eo2 += tri->dx23;
/* find trivial reject offsets for each edge for a single-pixel
* sized block. These will be scaled up at each recursive level to
* match the active blocksize. Scaling in this way works best if
* the blocks are square.
*/
plane->eo = 0;
if (plane->dcdx < 0) plane->eo -= plane->dcdx;
if (plane->dcdy > 0) plane->eo += plane->dcdy;
tri->eo3 = 0;
if (tri->dy31 < 0) tri->eo3 -= tri->dy31;
if (tri->dx31 > 0) tri->eo3 += tri->dx31;
/* Calculate trivial accept offsets from the above.
*/
plane->ei = plane->dcdy - plane->dcdx - plane->eo;
/* Calculate trivial accept offsets from the above.
*/
tri->ei1 = tri->dx12 - tri->dy12 - tri->eo1;
tri->ei2 = tri->dx23 - tri->dy23 - tri->eo2;
tri->ei3 = tri->dx31 - tri->dy31 - tri->eo3;
/* Fill in the inputs.step[][] arrays.
* We've manually unrolled some loops here.
*/
{
const int xstep1 = -tri->dy12;
const int xstep2 = -tri->dy23;
const int xstep3 = -tri->dy31;
const int ystep1 = tri->dx12;
const int ystep2 = tri->dx23;
const int ystep3 = tri->dx31;
#define SETUP_STEP(i, x, y) \
do { \
tri->inputs.step[0][i] = x * xstep1 + y * ystep1; \
tri->inputs.step[1][i] = x * xstep2 + y * ystep2; \
tri->inputs.step[2][i] = x * xstep3 + y * ystep3; \
} while (0)
plane->step = tri->step[i];
/* Fill in the inputs.step[][] arrays.
* We've manually unrolled some loops here.
*/
#define SETUP_STEP(j, x, y) \
tri->step[i][j] = y * plane->dcdy - x * plane->dcdx
SETUP_STEP(0, 0, 0);
SETUP_STEP(1, 1, 0);
SETUP_STEP(2, 0, 1);
@ -522,63 +622,106 @@ do_triangle_ccw(struct lp_setup_context *setup,
#undef STEP
}
/*
* When rasterizing scissored tris, use the intersection of the
* triangle bounding box and the scissor rect to generate the
* scissor planes.
*
* This permits us to cut off the triangle "tails" that are present
* in the intermediate recursive levels caused when two of the
* triangles edges don't diverge quickly enough to trivially reject
* exterior blocks from the triangle.
*
* It's not really clear if it's worth worrying about these tails,
* but since we generate the planes for each scissored tri, it's
* free to trim them in this case.
*
* Note that otherwise, the scissor planes only vary in 'C' value,
* and even then only on state-changes. Could alternatively store
* these planes elsewhere.
*/
if (nr_planes == 7) {
tri->plane[3].step = step_scissor_minx;
tri->plane[3].dcdx = -1;
tri->plane[3].dcdy = 0;
tri->plane[3].c = 1-minx;
tri->plane[3].ei = 0;
tri->plane[3].eo = 1;
tri->plane[4].step = step_scissor_maxx;
tri->plane[4].dcdx = 1;
tri->plane[4].dcdy = 0;
tri->plane[4].c = maxx;
tri->plane[4].ei = -1;
tri->plane[4].eo = 0;
tri->plane[5].step = step_scissor_miny;
tri->plane[5].dcdx = 0;
tri->plane[5].dcdy = 1;
tri->plane[5].c = 1-miny;
tri->plane[5].ei = 0;
tri->plane[5].eo = 1;
tri->plane[6].step = step_scissor_maxy;
tri->plane[6].dcdx = 0;
tri->plane[6].dcdy = -1;
tri->plane[6].c = maxy;
tri->plane[6].ei = -1;
tri->plane[6].eo = 0;
}
/*
* All fields of 'tri' are now set. The remaining code here is
* concerned with binning.
*/
/* Convert to tile coordinates:
/* Convert to tile coordinates, and inclusive ranges:
*/
minx = minx / TILE_SIZE;
miny = miny / TILE_SIZE;
maxx = maxx / TILE_SIZE;
maxy = maxy / TILE_SIZE;
ix0 = minx / TILE_SIZE;
iy0 = miny / TILE_SIZE;
ix1 = (maxx-1) / TILE_SIZE;
iy1 = (maxy-1) / TILE_SIZE;
/*
* Clamp to framebuffer size
*/
minx = MAX2(minx, 0);
miny = MAX2(miny, 0);
maxx = MIN2(maxx, scene->tiles_x - 1);
maxy = MIN2(maxy, scene->tiles_y - 1);
assert(ix0 == MAX2(ix0, 0));
assert(iy0 == MAX2(iy0, 0));
assert(ix1 == MIN2(ix1, scene->tiles_x - 1));
assert(iy1 == MIN2(iy1, scene->tiles_y - 1));
/* Determine which tile(s) intersect the triangle's bounding box
*/
if (miny == maxy && minx == maxx)
if (iy0 == iy1 && ix0 == ix1)
{
/* Triangle is contained in a single tile:
*/
lp_scene_bin_command( scene, minx, miny, lp_rast_triangle,
lp_rast_arg_triangle(tri) );
lp_scene_bin_command( scene, ix0, iy0,
lp_rast_tri_tab[nr_planes],
lp_rast_arg_triangle(tri, (1<<nr_planes)-1) );
}
else
else
{
int c1 = (tri->c1 +
tri->dx12 * miny * TILE_SIZE -
tri->dy12 * minx * TILE_SIZE);
int c2 = (tri->c2 +
tri->dx23 * miny * TILE_SIZE -
tri->dy23 * minx * TILE_SIZE);
int c3 = (tri->c3 +
tri->dx31 * miny * TILE_SIZE -
tri->dy31 * minx * TILE_SIZE);
int ei1 = tri->ei1 << TILE_ORDER;
int ei2 = tri->ei2 << TILE_ORDER;
int ei3 = tri->ei3 << TILE_ORDER;
int eo1 = tri->eo1 << TILE_ORDER;
int eo2 = tri->eo2 << TILE_ORDER;
int eo3 = tri->eo3 << TILE_ORDER;
int xstep1 = -(tri->dy12 << TILE_ORDER);
int xstep2 = -(tri->dy23 << TILE_ORDER);
int xstep3 = -(tri->dy31 << TILE_ORDER);
int ystep1 = tri->dx12 << TILE_ORDER;
int ystep2 = tri->dx23 << TILE_ORDER;
int ystep3 = tri->dx31 << TILE_ORDER;
int c[7];
int ei[7];
int eo[7];
int xstep[7];
int ystep[7];
int x, y;
for (i = 0; i < nr_planes; i++) {
c[i] = (tri->plane[i].c +
tri->plane[i].dcdy * iy0 * TILE_SIZE -
tri->plane[i].dcdx * ix0 * TILE_SIZE);
ei[i] = tri->plane[i].ei << TILE_ORDER;
eo[i] = tri->plane[i].eo << TILE_ORDER;
xstep[i] = -(tri->plane[i].dcdx << TILE_ORDER);
ystep[i] = tri->plane[i].dcdy << TILE_ORDER;
}
/* Test tile-sized blocks against the triangle.
@ -586,32 +729,49 @@ do_triangle_ccw(struct lp_setup_context *setup,
* contained inside the tri, bin an lp_rast_shade_tile command.
* Else, bin a lp_rast_triangle command.
*/
for (y = miny; y <= maxy; y++)
for (y = iy0; y <= iy1; y++)
{
int cx1 = c1;
int cx2 = c2;
int cx3 = c3;
boolean in = FALSE; /* are we inside the triangle? */
int cx[7];
for (x = minx; x <= maxx; x++)
for (i = 0; i < nr_planes; i++)
cx[i] = c[i];
for (x = ix0; x <= ix1; x++)
{
if (cx1 + eo1 < 0 ||
cx2 + eo2 < 0 ||
cx3 + eo3 < 0)
{
/* do nothing */
int out = 0;
int partial = 0;
for (i = 0; i < nr_planes; i++) {
int planeout = cx[i] + eo[i];
int planepartial = cx[i] + ei[i] - 1;
out |= (planeout >> 31);
partial |= (planepartial >> 31) & (1<<i);
}
if (out) {
/* do nothing */
if (in)
break; /* exiting triangle, all done with this row */
LP_COUNT(nr_empty_64);
if (in)
break; /* exiting triangle, all done with this row */
}
else if (cx1 + ei1 > 0 &&
cx2 + ei2 > 0 &&
cx3 + ei3 > 0)
{
}
else if (partial) {
/* Not trivially accepted by at least one plane -
* rasterize/shade partial tile
*/
int count = util_bitcount(partial);
in = TRUE;
lp_scene_bin_command( scene, x, y,
lp_rast_tri_tab[count],
lp_rast_arg_triangle(tri, partial) );
LP_COUNT(nr_partially_covered_64);
}
else {
/* triangle covers the whole tile- shade whole tile */
LP_COUNT(nr_fully_covered_64);
in = TRUE;
if (setup->fs.current.variant->opaque &&
in = TRUE;
if (variant->opaque &&
!setup->fb.zsbuf) {
lp_scene_bin_reset( scene, x, y );
lp_scene_bin_command( scene, x, y,
@ -621,29 +781,18 @@ do_triangle_ccw(struct lp_setup_context *setup,
lp_scene_bin_command( scene, x, y,
lp_rast_shade_tile,
lp_rast_arg_inputs(&tri->inputs) );
}
else
{
/* rasterizer/shade partial tile */
LP_COUNT(nr_partially_covered_64);
in = TRUE;
lp_scene_bin_command( scene, x, y,
lp_rast_triangle,
lp_rast_arg_triangle(tri) );
}
}
/* Iterate cx values across the region:
*/
cx1 += xstep1;
cx2 += xstep2;
cx3 += xstep3;
for (i = 0; i < nr_planes; i++)
cx[i] += xstep[i];
}
/* Iterate c values down the region:
*/
c1 += ystep1;
c2 += ystep2;
c3 += ystep3;
for (i = 0; i < nr_planes; i++)
c[i] += ystep[i];
}
}
}

View file

@ -31,9 +31,6 @@
* Code generate the whole fragment pipeline.
*
* The fragment pipeline consists of the following stages:
* - triangle edge in/out testing
* - scissor test
* - stipple (TBI)
* - early depth test
* - fragment shader
* - alpha test
@ -97,6 +94,7 @@
#include "lp_state.h"
#include "lp_tex_sample.h"
#include "lp_flush.h"
#include "lp_state_fs.h"
#include <llvm-c/Analysis.h>
@ -170,177 +168,63 @@ generate_depth_stencil(LLVMBuilderRef builder,
/**
* Generate the code to do inside/outside triangle testing for the
* Expand the relevent bits of mask_input to a 4-dword mask for the
* four pixels in a 2x2 quad. This will set the four elements of the
* quad mask vector to 0 or ~0.
* \param i which quad of the quad group to test, in [0,3]
*
* \param quad which quad of the quad group to test, in [0,3]
* \param mask_input bitwise mask for the whole 4x4 stamp
*/
static void
generate_tri_edge_mask(LLVMBuilderRef builder,
unsigned i,
LLVMValueRef *mask, /* ivec4, out */
LLVMValueRef c0, /* int32 */
LLVMValueRef c1, /* int32 */
LLVMValueRef c2, /* int32 */
LLVMValueRef step0_ptr, /* ivec4 */
LLVMValueRef step1_ptr, /* ivec4 */
LLVMValueRef step2_ptr) /* ivec4 */
static LLVMValueRef
generate_quad_mask(LLVMBuilderRef builder,
struct lp_type fs_type,
unsigned quad,
LLVMValueRef mask_input) /* int32 */
{
#define OPTIMIZE_IN_OUT_TEST 0
#if OPTIMIZE_IN_OUT_TEST
struct lp_build_if_state ifctx;
LLVMValueRef not_draw_all;
#endif
struct lp_build_flow_context *flow;
struct lp_type i32_type;
LLVMTypeRef i32vec4_type;
LLVMValueRef c0_vec, c1_vec, c2_vec;
LLVMValueRef in_out_mask;
assert(i < 4);
/* int32 vector type */
memset(&i32_type, 0, sizeof i32_type);
i32_type.floating = FALSE; /* values are integers */
i32_type.sign = TRUE; /* values are signed */
i32_type.norm = FALSE; /* values are not normalized */
i32_type.width = 32; /* 32-bit int values */
i32_type.length = 4; /* 4 elements per vector */
i32vec4_type = lp_build_int32_vec4_type();
struct lp_type mask_type;
LLVMTypeRef i32t = LLVMInt32Type();
LLVMValueRef bits[4];
LLVMValueRef mask;
/*
* Use a conditional here to do detailed pixel in/out testing.
* We only have to do this if c0 != INT_MIN.
* XXX: We'll need a different path for 16 x u8
*/
flow = lp_build_flow_create(builder);
lp_build_flow_scope_begin(flow);
assert(fs_type.width == 32);
assert(fs_type.length == 4);
mask_type = lp_int_type(fs_type);
{
#if OPTIMIZE_IN_OUT_TEST
/* not_draw_all = (c0 != INT_MIN) */
not_draw_all = LLVMBuildICmp(builder,
LLVMIntNE,
c0,
LLVMConstInt(LLVMInt32Type(), INT_MIN, 0),
"");
in_out_mask = lp_build_const_int_vec(i32_type, ~0);
lp_build_flow_scope_declare(flow, &in_out_mask);
/* if (not_draw_all) {... */
lp_build_if(&ifctx, flow, builder, not_draw_all);
#endif
{
LLVMValueRef step0_vec, step1_vec, step2_vec;
LLVMValueRef m0_vec, m1_vec, m2_vec;
LLVMValueRef index, m;
/* c0_vec = {c0, c0, c0, c0}
* Note that we emit this code four times but LLVM optimizes away
* three instances of it.
*/
c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
lp_build_name(c0_vec, "edgeconst0vec");
lp_build_name(c1_vec, "edgeconst1vec");
lp_build_name(c2_vec, "edgeconst2vec");
/* load step0vec, step1, step2 vec from memory */
index = LLVMConstInt(LLVMInt32Type(), i, 0);
step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
lp_build_name(step0_vec, "step0vec");
lp_build_name(step1_vec, "step1vec");
lp_build_name(step2_vec, "step2vec");
/* m0_vec = step0_ptr[i] > c0_vec */
m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);
/* in_out_mask = m0_vec & m1_vec & m2_vec */
m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
in_out_mask = LLVMBuildAnd(builder, m, m2_vec, "");
lp_build_name(in_out_mask, "inoutmaskvec");
}
#if OPTIMIZE_IN_OUT_TEST
lp_build_endif(&ifctx);
#endif
}
lp_build_flow_scope_end(flow);
lp_build_flow_destroy(flow);
/* This is the initial alive/dead pixel mask for a quad of four pixels.
* It's an int[4] vector with each word set to 0 or ~0.
* Words will get cleared when pixels faile the Z test, etc.
/*
* mask_input >>= (quad * 4)
*/
*mask = in_out_mask;
}
mask_input = LLVMBuildLShr(builder,
mask_input,
LLVMConstInt(i32t, quad * 4, 0),
"");
static LLVMValueRef
generate_scissor_test(LLVMBuilderRef builder,
LLVMValueRef context_ptr,
const struct lp_build_interp_soa_context *interp,
struct lp_type type)
{
LLVMTypeRef vec_type = lp_build_vec_type(type);
LLVMValueRef xpos = interp->pos[0], ypos = interp->pos[1];
LLVMValueRef xmin, ymin, xmax, ymax;
LLVMValueRef m0, m1, m2, m3, m;
/*
* mask = { mask_input & (1 << i), for i in [0,3] }
*/
/* xpos, ypos contain the window coords for the four pixels in the quad */
assert(xpos);
assert(ypos);
mask = lp_build_broadcast(builder, lp_build_vec_type(mask_type), mask_input);
/* get the current scissor bounds, convert to vectors */
xmin = lp_jit_context_scissor_xmin_value(builder, context_ptr);
xmin = lp_build_broadcast(builder, vec_type, xmin);
bits[0] = LLVMConstInt(i32t, 1 << 0, 0);
bits[1] = LLVMConstInt(i32t, 1 << 1, 0);
bits[2] = LLVMConstInt(i32t, 1 << 2, 0);
bits[3] = LLVMConstInt(i32t, 1 << 3, 0);
ymin = lp_jit_context_scissor_ymin_value(builder, context_ptr);
ymin = lp_build_broadcast(builder, vec_type, ymin);
mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, 4), "");
xmax = lp_jit_context_scissor_xmax_value(builder, context_ptr);
xmax = lp_build_broadcast(builder, vec_type, xmax);
/*
* mask = mask != 0 ? ~0 : 0
*/
ymax = lp_jit_context_scissor_ymax_value(builder, context_ptr);
ymax = lp_build_broadcast(builder, vec_type, ymax);
mask = lp_build_compare(builder,
mask_type, PIPE_FUNC_NOTEQUAL,
mask,
lp_build_const_int_vec(mask_type, 0));
/* compare the fragment's position coordinates against the scissor bounds */
m0 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, xpos, xmin);
m1 = lp_build_compare(builder, type, PIPE_FUNC_GEQUAL, ypos, ymin);
m2 = lp_build_compare(builder, type, PIPE_FUNC_LESS, xpos, xmax);
m3 = lp_build_compare(builder, type, PIPE_FUNC_LESS, ypos, ymax);
/* AND all the masks together */
m = LLVMBuildAnd(builder, m0, m1, "");
m = LLVMBuildAnd(builder, m, m2, "");
m = LLVMBuildAnd(builder, m, m3, "");
lp_build_name(m, "scissormask");
return m;
}
static LLVMValueRef
build_int32_vec_const(int value)
{
struct lp_type i32_type;
memset(&i32_type, 0, sizeof i32_type);
i32_type.floating = FALSE; /* values are integers */
i32_type.sign = TRUE; /* values are signed */
i32_type.norm = FALSE; /* values are not normalized */
i32_type.width = 32; /* 32-bit int values */
i32_type.length = 4; /* 4 elements per vector */
return lp_build_const_int_vec(i32_type, value);
return mask;
}
@ -348,7 +232,7 @@ build_int32_vec_const(int value)
/**
* Generate the fragment shader, depth/stencil test, and alpha tests.
* \param i which quad in the tile, in range [0,3]
* \param do_tri_test if 1, do triangle edge in/out testing
* \param partial_mask if 1, do mask_input testing
*/
static void
generate_fs(struct llvmpipe_context *lp,
@ -364,13 +248,8 @@ generate_fs(struct llvmpipe_context *lp,
LLVMValueRef (*color)[4],
LLVMValueRef depth_ptr,
LLVMValueRef facing,
unsigned do_tri_test,
LLVMValueRef c0,
LLVMValueRef c1,
LLVMValueRef c2,
LLVMValueRef step0_ptr,
LLVMValueRef step1_ptr,
LLVMValueRef step2_ptr,
unsigned partial_mask,
LLVMValueRef mask_input,
LLVMValueRef counter)
{
const struct tgsi_token *tokens = shader->base.tokens;
@ -411,23 +290,17 @@ generate_fs(struct llvmpipe_context *lp,
lp_build_flow_scope_declare(flow, &z);
/* do triangle edge testing */
if (do_tri_test) {
generate_tri_edge_mask(builder, i, pmask,
c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
if (partial_mask) {
*pmask = generate_quad_mask(builder, type,
i, mask_input);
}
else {
*pmask = build_int32_vec_const(~0);
*pmask = lp_build_const_int_vec(type, ~0);
}
/* 'mask' will control execution based on quad's pixel alive/killed state */
lp_build_mask_begin(&mask, flow, type, *pmask);
if (key->scissor) {
LLVMValueRef smask =
generate_scissor_test(builder, context_ptr, interp, type);
lp_build_mask_update(&mask, smask);
}
early_depth_stencil_test =
(key->depth.enabled || key->stencil[0].enabled) &&
!key->alpha.enabled &&
@ -579,7 +452,7 @@ static void
generate_fragment(struct llvmpipe_context *lp,
struct lp_fragment_shader *shader,
struct lp_fragment_shader_variant *variant,
unsigned do_tri_test)
unsigned partial_mask)
{
struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
const struct lp_fragment_shader_variant_key *key = &variant->key;
@ -589,9 +462,8 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMTypeRef fs_elem_type;
LLVMTypeRef fs_int_vec_type;
LLVMTypeRef blend_vec_type;
LLVMTypeRef arg_types[16];
LLVMTypeRef arg_types[11];
LLVMTypeRef func_type;
LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type();
LLVMValueRef context_ptr;
LLVMValueRef x;
LLVMValueRef y;
@ -600,7 +472,8 @@ generate_fragment(struct llvmpipe_context *lp,
LLVMValueRef dady_ptr;
LLVMValueRef color_ptr_ptr;
LLVMValueRef depth_ptr;
LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr, counter = NULL;
LLVMValueRef mask_input;
LLVMValueRef counter = NULL;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
struct lp_build_sampler_soa *sampler;
@ -645,7 +518,7 @@ generate_fragment(struct llvmpipe_context *lp,
blend_vec_type = lp_build_vec_type(blend_type);
util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
shader->no, variant->no, do_tri_test ? "edge" : "whole");
shader->no, variant->no, partial_mask ? "partial" : "whole");
arg_types[0] = screen->context_ptr_type; /* context */
arg_types[1] = LLVMInt32Type(); /* x */
@ -656,23 +529,15 @@ generate_fragment(struct llvmpipe_context *lp,
arg_types[6] = LLVMPointerType(fs_elem_type, 0); /* dady */
arg_types[7] = LLVMPointerType(LLVMPointerType(blend_vec_type, 0), 0); /* color */
arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
arg_types[9] = LLVMInt32Type(); /* c0 */
arg_types[10] = LLVMInt32Type(); /* c1 */
arg_types[11] = LLVMInt32Type(); /* c2 */
/* Note: the step arrays are built as int32[16] but we interpret
* them here as int32_vec4[4].
*/
arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step0 */
arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step1 */
arg_types[14] = LLVMPointerType(int32_vec4_type, 0);/* step2 */
arg_types[15] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
arg_types[9] = LLVMInt32Type(); /* mask_input */
arg_types[10] = LLVMPointerType(LLVMInt32Type(), 0);/* counter */
func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
function = LLVMAddFunction(screen->module, func_name, func_type);
LLVMSetFunctionCallConv(function, LLVMCCallConv);
variant->function[do_tri_test] = function;
variant->function[partial_mask] = function;
/* XXX: need to propagate noalias down into color param now we are
@ -691,12 +556,7 @@ generate_fragment(struct llvmpipe_context *lp,
dady_ptr = LLVMGetParam(function, 6);
color_ptr_ptr = LLVMGetParam(function, 7);
depth_ptr = LLVMGetParam(function, 8);
c0 = LLVMGetParam(function, 9);
c1 = LLVMGetParam(function, 10);
c2 = LLVMGetParam(function, 11);
step0_ptr = LLVMGetParam(function, 12);
step1_ptr = LLVMGetParam(function, 13);
step2_ptr = LLVMGetParam(function, 14);
mask_input = LLVMGetParam(function, 9);
lp_build_name(context_ptr, "context");
lp_build_name(x, "x");
@ -706,15 +566,10 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(dady_ptr, "dady");
lp_build_name(color_ptr_ptr, "color_ptr_ptr");
lp_build_name(depth_ptr, "depth");
lp_build_name(c0, "c0");
lp_build_name(c1, "c1");
lp_build_name(c2, "c2");
lp_build_name(step0_ptr, "step0");
lp_build_name(step1_ptr, "step1");
lp_build_name(step2_ptr, "step2");
lp_build_name(mask_input, "mask_input");
if (key->occlusion_count) {
counter = LLVMGetParam(function, 15);
counter = LLVMGetParam(function, 10);
lp_build_name(counter, "counter");
}
@ -763,9 +618,9 @@ generate_fragment(struct llvmpipe_context *lp,
out_color,
depth_ptr_i,
facing,
do_tri_test,
c0, c1, c2,
step0_ptr, step1_ptr, step2_ptr, counter);
partial_mask,
mask_input,
counter);
for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++)
for(chan = 0; chan < NUM_CHANNELS; ++chan)
@ -792,9 +647,13 @@ generate_fragment(struct llvmpipe_context *lp,
lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
}
lp_build_conv_mask(builder, fs_type, blend_type,
fs_mask, num_fs,
&blend_mask, 1);
if (partial_mask || !variant->opaque) {
lp_build_conv_mask(builder, fs_type, blend_type,
fs_mask, num_fs,
&blend_mask, 1);
} else {
blend_mask = lp_build_const_int_vec(blend_type, ~0);
}
color_ptr = LLVMBuildLoad(builder,
LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
@ -832,8 +691,7 @@ generate_fragment(struct llvmpipe_context *lp,
#endif
/* Apply optimizations to LLVM IR */
if (1)
LLVMRunFunctionPassManager(screen->pass, function);
LLVMRunFunctionPassManager(screen->pass, function);
if (gallivm_debug & GALLIVM_DEBUG_IR) {
/* Print the LLVM IR to stderr */
@ -847,7 +705,7 @@ generate_fragment(struct llvmpipe_context *lp,
{
void *f = LLVMGetPointerToGlobal(screen->engine, function);
variant->jit_function[do_tri_test] = (lp_jit_frag_func)pointer_to_func(f);
variant->jit_function[partial_mask] = (lp_jit_frag_func)pointer_to_func(f);
if (gallivm_debug & GALLIVM_DEBUG_ASM) {
lp_disassemble(f);
@ -963,7 +821,6 @@ generate_variant(struct llvmpipe_context *lp,
!key->stencil[0].enabled &&
!key->alpha.enabled &&
!key->depth.enabled &&
!key->scissor &&
!shader->info.uses_kill
? TRUE : FALSE;
@ -1182,7 +1039,6 @@ make_variant_key(struct llvmpipe_context *lp,
/* alpha.ref_value is passed in jit_context */
key->flatshade = lp->rasterizer->flatshade;
key->scissor = lp->rasterizer->scissor;
if (lp->active_query_count) {
key->occlusion_count = TRUE;
}

View file

@ -54,7 +54,6 @@ struct lp_fragment_shader_variant_key
enum pipe_format zsbuf_format;
unsigned nr_cbufs:8;
unsigned flatshade:1;
unsigned scissor:1;
unsigned occlusion_count:1;
struct {