llvmpipe: improve rasterization discard logic

This unifies the explicit rasterization discard as well as the implicit
rasterization disabled logic (which we need for another state tracker),
which really should do the exact same thing.
We'll now toss out the prims early on in setup with (implicit or
explicit) discard, rather than do setup and binning with them, which
was entirely pointless.
(We should eventually get rid of implicit discard, which should also
enable us to discard stuff already in draw, hence draw would be
able to skip the pointless clip and fallback stages in this case.)
We still need separate logic for only null ps - this is not the same
as rasterization discard. But simplify the logic there and don't count
primitives simply when there's an empty fs, regardless of depth/stencil
tests, which seems perfectly acceptable by d3d10.
While here, also fix statistics for primitives if face culling is
enabled.
No piglit changes.

Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
This commit is contained in:
Roland Scheidegger 2018-05-22 02:12:38 +02:00
parent 047438287c
commit 7b89fcec41
15 changed files with 118 additions and 89 deletions

View file

@ -136,7 +136,6 @@ struct llvmpipe_context {
struct blitter_context *blitter;
unsigned tex_timestamp;
boolean no_rast;
/** List of all fragment shader variants */
struct lp_fs_variant_list_item fs_variants_list;

View file

@ -212,6 +212,7 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
elem_types[LP_JIT_THREAD_DATA_CACHE] =
LLVMPointerType(lp_build_format_cache_type(gallivm), 0);
elem_types[LP_JIT_THREAD_DATA_COUNTER] = LLVMInt64TypeInContext(lc);
elem_types[LP_JIT_THREAD_DATA_INVOCATIONS] = LLVMInt64TypeInContext(lc);
elem_types[LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX] =
LLVMInt32TypeInContext(lc);

View file

@ -192,6 +192,7 @@ struct lp_jit_thread_data
{
struct lp_build_format_cache *cache;
uint64_t vis_counter;
uint64_t ps_invocations;
/*
* Non-interpolated rasterizer state passed through to the fragment shader.
@ -205,6 +206,7 @@ struct lp_jit_thread_data
enum {
LP_JIT_THREAD_DATA_CACHE = 0,
LP_JIT_THREAD_DATA_COUNTER,
LP_JIT_THREAD_DATA_INVOCATIONS,
LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX,
LP_JIT_THREAD_DATA_COUNT
};
@ -216,6 +218,9 @@ enum {
#define lp_jit_thread_data_counter(_gallivm, _ptr) \
lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_COUNTER, "counter")
#define lp_jit_thread_data_invocations(_gallivm, _ptr) \
lp_build_struct_get_ptr(_gallivm, _ptr, LP_JIT_THREAD_DATA_INVOCATIONS, "invocs")
#define lp_jit_thread_data_raster_state_viewport_index(_gallivm, _ptr) \
lp_build_struct_get(_gallivm, _ptr, \
LP_JIT_THREAD_DATA_RASTER_STATE_VIEWPORT_INDEX, \

View file

@ -107,7 +107,7 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
task->thread_data.vis_counter = 0;
task->ps_invocations = 0;
task->thread_data.ps_invocations = 0;
for (i = 0; i < task->scene->fb.nr_cbufs; i++) {
if (task->scene->fb.cbufs[i]) {
@ -446,10 +446,6 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
* allocated 4x4 blocks hence need to filter them out here.
*/
if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
/* not very accurate would need a popcount on the mask */
/* always count this not worth bothering? */
task->ps_invocations += 1 * variant->ps_inv_multiplier;
/* Propagate non-interpolated raster state. */
task->thread_data.raster_state.viewport_index = inputs->viewport_index;
@ -491,7 +487,7 @@ lp_rast_begin_query(struct lp_rasterizer_task *task,
pq->start[task->thread_index] = task->thread_data.vis_counter;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
pq->start[task->thread_index] = task->ps_invocations;
pq->start[task->thread_index] = task->thread_data.ps_invocations;
break;
default:
assert(0);
@ -524,7 +520,7 @@ lp_rast_end_query(struct lp_rasterizer_task *task,
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
pq->end[task->thread_index] +=
task->ps_invocations - pq->start[task->thread_index];
task->thread_data.ps_invocations - pq->start[task->thread_index];
pq->start[task->thread_index] = 0;
break;
default:
@ -679,7 +675,7 @@ rasterize_scene(struct lp_rasterizer_task *task,
#endif
#endif
if (!task->rast->no_rast && !scene->discard) {
if (!task->rast->no_rast) {
/* loop over scene bins, rasterize each */
{
struct cmd_bin *bin;

View file

@ -99,8 +99,6 @@ struct lp_rasterizer_task
/** Non-interpolated passthru state and occlude counter for visible pixels */
struct lp_jit_thread_data thread_data;
uint64_t ps_invocations;
uint8_t ps_inv_multiplier;
pipe_semaphore work_ready;
pipe_semaphore work_done;
@ -259,10 +257,6 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
* allocated 4x4 blocks hence need to filter them out here.
*/
if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
/* not very accurate would need a popcount on the mask */
/* always count this not worth bothering? */
task->ps_invocations += 1 * variant->ps_inv_multiplier;
/* Propagate non-interpolated raster state. */
task->thread_data.raster_state.viewport_index = inputs->viewport_index;

View file

@ -507,15 +507,14 @@ end:
}
void lp_scene_begin_binning( struct lp_scene *scene,
struct pipe_framebuffer_state *fb, boolean discard )
void lp_scene_begin_binning(struct lp_scene *scene,
struct pipe_framebuffer_state *fb)
{
int i;
unsigned max_layer = ~0;
assert(lp_scene_is_empty(scene));
scene->discard = discard;
util_copy_framebuffer_state(&scene->fb, fb);
scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE;

View file

@ -166,7 +166,6 @@ struct lp_scene {
unsigned resource_reference_size;
boolean alloc_failed;
boolean discard;
/**
* Number of active tiles in each dimension.
* This basically the framebuffer size divided by tile size
@ -389,12 +388,11 @@ lp_scene_bin_iter_next( struct lp_scene *scene, int *x, int *y );
/* Begin/end binning of a scene
*/
void
lp_scene_begin_binning( struct lp_scene *scene,
struct pipe_framebuffer_state *fb,
boolean discard );
lp_scene_begin_binning(struct lp_scene *scene,
struct pipe_framebuffer_state *fb);
void
lp_scene_end_binning( struct lp_scene *scene );
lp_scene_end_binning(struct lp_scene *scene);
/* Begin/end rasterization of a scene
@ -403,7 +401,7 @@ void
lp_scene_begin_rasterization(struct lp_scene *scene);
void
lp_scene_end_rasterization(struct lp_scene *scene );
lp_scene_end_rasterization(struct lp_scene *scene);

View file

@ -82,7 +82,7 @@ lp_setup_get_empty_scene(struct lp_setup_context *setup)
lp_fence_wait(setup->scene->fence);
}
lp_scene_begin_binning(setup->scene, &setup->fb, setup->rasterizer_discard);
lp_scene_begin_binning(setup->scene, &setup->fb);
}
@ -724,25 +724,27 @@ lp_setup_set_scissors( struct lp_setup_context *setup,
void
lp_setup_set_flatshade_first( struct lp_setup_context *setup,
boolean flatshade_first )
lp_setup_set_flatshade_first(struct lp_setup_context *setup,
boolean flatshade_first)
{
setup->flatshade_first = flatshade_first;
}
void
lp_setup_set_rasterizer_discard( struct lp_setup_context *setup,
boolean rasterizer_discard )
lp_setup_set_rasterizer_discard(struct lp_setup_context *setup,
boolean rasterizer_discard)
{
if (setup->rasterizer_discard != rasterizer_discard) {
setup->rasterizer_discard = rasterizer_discard;
set_scene_state( setup, SETUP_FLUSHED, __FUNCTION__ );
setup->line = first_line;
setup->point = first_point;
setup->triangle = first_triangle;
}
}
void
lp_setup_set_vertex_info( struct lp_setup_context *setup,
struct vertex_info *vertex_info )
lp_setup_set_vertex_info(struct lp_setup_context *setup,
struct vertex_info *vertex_info)
{
/* XXX: just silently holding onto the pointer:
*/

View file

@ -616,8 +616,7 @@ try_setup_line( struct lp_setup_context *setup,
LP_COUNT(nr_tris);
if (lp_context->active_statistics_queries &&
!llvmpipe_rasterization_disabled(lp_context)) {
if (lp_context->active_statistics_queries) {
lp_context->pipeline_statistics.c_primitives++;
}
@ -759,24 +758,33 @@ try_setup_line( struct lp_setup_context *setup,
}
static void lp_setup_line( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4] )
static void lp_setup_line_discard(struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4])
{
if (!try_setup_line( setup, v0, v1 ))
{
}
static void lp_setup_line(struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4])
{
if (!try_setup_line(setup, v0, v1)) {
if (!lp_setup_flush_and_restart(setup))
return;
if (!try_setup_line( setup, v0, v1 ))
if (!try_setup_line(setup, v0, v1))
return;
}
}
void lp_setup_choose_line( struct lp_setup_context *setup )
void lp_setup_choose_line(struct lp_setup_context *setup)
{
setup->line = lp_setup_line;
if (setup->rasterizer_discard) {
setup->line = lp_setup_line_discard;
} else {
setup->line = lp_setup_line;
}
}

View file

@ -458,8 +458,7 @@ try_setup_point( struct lp_setup_context *setup,
LP_COUNT(nr_tris);
if (lp_context->active_statistics_queries &&
!llvmpipe_rasterization_disabled(lp_context)) {
if (lp_context->active_statistics_queries) {
lp_context->pipeline_statistics.c_primitives++;
}
@ -518,24 +517,33 @@ try_setup_point( struct lp_setup_context *setup,
static void
lp_setup_point_discard(struct lp_setup_context *setup,
const float (*v0)[4])
{
}
static void
lp_setup_point(struct lp_setup_context *setup,
const float (*v0)[4])
{
if (!try_setup_point( setup, v0 ))
{
if (!try_setup_point(setup, v0)) {
if (!lp_setup_flush_and_restart(setup))
return;
if (!try_setup_point( setup, v0 ))
if (!try_setup_point(setup, v0))
return;
}
}
void
lp_setup_choose_point( struct lp_setup_context *setup )
lp_setup_choose_point(struct lp_setup_context *setup)
{
setup->point = lp_setup_point;
if (setup->rasterizer_discard) {
setup->point = lp_setup_point_discard;
} else {
setup->point = lp_setup_point;
}
}

View file

@ -1127,6 +1127,11 @@ static void triangle_cw(struct lp_setup_context *setup,
const float (*v2)[4])
{
PIPE_ALIGN_VAR(16) struct fixed_position position;
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
if (lp_context->active_statistics_queries) {
lp_context->pipeline_statistics.c_primitives++;
}
calc_fixed_position(setup, &position, v0, v1, v2);
@ -1148,6 +1153,11 @@ static void triangle_ccw(struct lp_setup_context *setup,
const float (*v2)[4])
{
PIPE_ALIGN_VAR(16) struct fixed_position position;
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
if (lp_context->active_statistics_queries) {
lp_context->pipeline_statistics.c_primitives++;
}
calc_fixed_position(setup, &position, v0, v1, v2);
@ -1166,8 +1176,7 @@ static void triangle_both(struct lp_setup_context *setup,
PIPE_ALIGN_VAR(16) struct fixed_position position;
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
if (lp_context->active_statistics_queries &&
!llvmpipe_rasterization_disabled(lp_context)) {
if (lp_context->active_statistics_queries) {
lp_context->pipeline_statistics.c_primitives++;
}
@ -1196,17 +1205,21 @@ static void triangle_both(struct lp_setup_context *setup,
}
static void triangle_nop( struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4] )
static void triangle_noop(struct lp_setup_context *setup,
const float (*v0)[4],
const float (*v1)[4],
const float (*v2)[4])
{
}
void
lp_setup_choose_triangle( struct lp_setup_context *setup )
lp_setup_choose_triangle(struct lp_setup_context *setup)
{
if (setup->rasterizer_discard) {
setup->triangle = triangle_noop;
return;
}
switch (setup->cullmode) {
case PIPE_FACE_NONE:
setup->triangle = triangle_both;
@ -1218,7 +1231,7 @@ lp_setup_choose_triangle( struct lp_setup_context *setup )
setup->triangle = setup->ccw_is_frontface ? triangle_cw : triangle_ccw;
break;
default:
setup->triangle = triangle_nop;
setup->triangle = triangle_noop;
break;
}
}

View file

@ -571,7 +571,7 @@ lp_setup_pipeline_statistics(
stats->gs_invocations;
llvmpipe->pipeline_statistics.gs_primitives +=
stats->gs_primitives;
if (!llvmpipe_rasterization_disabled(llvmpipe)) {
if (!setup->rasterizer_discard) {
llvmpipe->pipeline_statistics.c_invocations +=
stats->c_invocations;
} else {

View file

@ -207,13 +207,27 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
LP_NEW_SAMPLER |
LP_NEW_SAMPLER_VIEW |
LP_NEW_OCCLUSION_QUERY))
llvmpipe_update_fs( llvmpipe );
llvmpipe_update_fs(llvmpipe);
if (llvmpipe->dirty & (LP_NEW_RASTERIZER)) {
if (llvmpipe->dirty & (LP_NEW_FS |
LP_NEW_FRAMEBUFFER |
LP_NEW_RASTERIZER |
LP_NEW_DEPTH_STENCIL_ALPHA)) {
/*
* Rasterization is disabled if there is no pixel shader and
* both depth and stencil testing are disabled:
* http://msdn.microsoft.com/en-us/library/windows/desktop/bb205125
* FIXME: set rasterizer_discard in state tracker instead.
*/
boolean null_fs = !llvmpipe->fs ||
llvmpipe->fs->info.base.num_instructions <= 1;
boolean discard =
(llvmpipe->sample_mask & 1) == 0 ||
(llvmpipe->rasterizer ? llvmpipe->rasterizer->rasterizer_discard : FALSE);
(llvmpipe->rasterizer ? llvmpipe->rasterizer->rasterizer_discard : FALSE) ||
(null_fs &&
!llvmpipe->depth_stencil->depth.enabled &&
!llvmpipe->depth_stencil->stencil[0].enabled);
lp_setup_set_rasterizer_discard(llvmpipe->setup, discard);
}

View file

@ -2554,6 +2554,25 @@ generate_fragment(struct llvmpipe_context *lp,
assert(builder);
LLVMPositionBuilderAtEnd(builder, block);
/*
* Must not count ps invocations if there's a null shader.
* (It would be ok to count with null shader if there's d/s tests,
* but only if there's d/s buffers too, which is different
* to implicit rasterization disable which must not depend
* on the d/s buffers.)
* Could use popcount on mask, but pixel accuracy is not required.
* Could disable if there's no stats query, but maybe not worth it.
*/
if (shader->info.base.num_instructions > 1) {
LLVMValueRef invocs, val;
invocs = lp_jit_thread_data_invocations(gallivm, thread_data_ptr);
val = LLVMBuildLoad(builder, invocs, "");
val = LLVMBuildAdd(builder, val,
LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 1, 0),
"invoc_count");
LLVMBuildStore(builder, val, invocs);
}
/* code generated texture sampling */
sampler = lp_llvm_sampler_soa_create(key->state);
@ -2843,14 +2862,6 @@ generate_variant(struct llvmpipe_context *lp,
!shader->info.base.writes_samplemask
? TRUE : FALSE;
/* if num_instructions == 1, it's a nop shader with only an END instruction */
if ((shader->info.base.num_instructions <= 1) &&
!key->depth.enabled && !key->stencil[0].enabled) {
variant->ps_inv_multiplier = 0;
} else {
variant->ps_inv_multiplier = 1;
}
if ((LP_DEBUG & DEBUG_FS) || (gallivm_debug & GALLIVM_DEBUG_IR)) {
lp_debug_fs_variant(variant);
}
@ -3471,18 +3482,4 @@ llvmpipe_init_fs_funcs(struct llvmpipe_context *llvmpipe)
llvmpipe->pipe.set_constant_buffer = llvmpipe_set_constant_buffer;
}
/*
* Rasterization is disabled if there is no pixel shader and
* both depth and stencil testing are disabled:
* http://msdn.microsoft.com/en-us/library/windows/desktop/bb205125
*/
boolean
llvmpipe_rasterization_disabled(struct llvmpipe_context *lp)
{
/* if num_instructions == 1, it's a nop shader with only an END instruction */
boolean null_fs = !lp->fs || lp->fs->info.base.num_instructions <= 1;
return (null_fs &&
!lp->depth_stencil->depth.enabled &&
!lp->depth_stencil->stencil[0].enabled);
}

View file

@ -98,7 +98,6 @@ struct lp_fragment_shader_variant
struct lp_fragment_shader_variant_key key;
boolean opaque;
uint8_t ps_inv_multiplier;
struct gallivm_state *gallivm;
@ -150,8 +149,4 @@ void
llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
struct lp_fragment_shader_variant *variant);
boolean
llvmpipe_rasterization_disabled(struct llvmpipe_context *lp);
#endif /* LP_STATE_FS_H_ */