mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-18 19:10:32 +01:00
radeonsi: implement shader-based culling for lines
This helps some viewperf subtests. Only view XY culling is done. Edgeflags are always disabled with lines. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13048>
This commit is contained in:
parent
e7e0b90c94
commit
f00d3e2909
8 changed files with 59 additions and 25 deletions
|
|
@ -52,7 +52,7 @@ struct ac_position_w_info {
|
|||
};
|
||||
|
||||
static void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||
struct ac_position_w_info *w)
|
||||
struct ac_position_w_info *w, unsigned num_vertices)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->builder;
|
||||
LLVMValueRef all_w_negative = ctx->i1true;
|
||||
|
|
@ -60,7 +60,7 @@ static void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[
|
|||
w->w_reflection = ctx->i1false;
|
||||
w->any_w_negative = ctx->i1false;
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
for (unsigned i = 0; i < num_vertices; i++) {
|
||||
LLVMValueRef neg_w;
|
||||
|
||||
neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
|
||||
|
|
@ -137,11 +137,14 @@ static void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
|||
/* Compute the primitive bounding box for easy culling. */
|
||||
for (unsigned chan = 0; chan < (options->cull_view_near_z ||
|
||||
options->cull_view_far_z ? 3 : 2); chan++) {
|
||||
assert(options->num_vertices >= 2);
|
||||
bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
|
||||
bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
|
||||
|
||||
bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
|
||||
bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
|
||||
|
||||
if (options->num_vertices == 3) {
|
||||
bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
|
||||
bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
|
||||
}
|
||||
}
|
||||
|
||||
/* View culling. */
|
||||
|
|
@ -231,7 +234,7 @@ void ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
|||
void *userdata)
|
||||
{
|
||||
struct ac_position_w_info w;
|
||||
ac_analyze_position_w(ctx, pos, &w);
|
||||
ac_analyze_position_w(ctx, pos, &w, options->num_vertices);
|
||||
|
||||
/* W culling. */
|
||||
LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
|
||||
|
|
|
|||
|
|
@ -46,6 +46,8 @@ struct ac_cull_options {
|
|||
bool cull_w; /* cull primitives with all W < 0 */
|
||||
|
||||
bool use_halfz_clip_space;
|
||||
|
||||
uint8_t num_vertices; /* 1..3 */
|
||||
};
|
||||
|
||||
/* Callback invoked in the inner-most branch where the primitive is accepted. */
|
||||
|
|
|
|||
|
|
@ -83,6 +83,9 @@ static LLVMValueRef ngg_get_vertices_per_prim(struct si_shader_context *ctx, uns
|
|||
/* Blits always use axis-aligned rectangles with 3 vertices. */
|
||||
*num_vertices = 3;
|
||||
return LLVMConstInt(ctx->ac.i32, 3, 0);
|
||||
} else if (ctx->shader->key.opt.ngg_culling & SI_NGG_CULL_LINES) {
|
||||
*num_vertices = 2;
|
||||
return LLVMConstInt(ctx->ac.i32, 2, 0);
|
||||
} else {
|
||||
/* We always build up all three indices for the prim export
|
||||
* independent of the primitive type. The additional garbage
|
||||
|
|
@ -994,13 +997,23 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
|
|||
|
||||
/* Execute culling code. */
|
||||
struct ac_cull_options options = {};
|
||||
options.cull_front = shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
|
||||
options.cull_back = shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE;
|
||||
options.cull_view_xy = true;
|
||||
options.cull_small_prims = true; /* this would only be false with conservative rasterization */
|
||||
options.cull_zero_area = options.cull_front || options.cull_back;
|
||||
options.cull_w = true;
|
||||
|
||||
if (shader->key.opt.ngg_culling & SI_NGG_CULL_LINES) {
|
||||
options.num_vertices = 2;
|
||||
|
||||
assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE));
|
||||
assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE));
|
||||
assert(!(shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL));
|
||||
} else {
|
||||
options.num_vertices = 3;
|
||||
options.cull_front = shader->key.opt.ngg_culling & SI_NGG_CULL_FRONT_FACE;
|
||||
options.cull_back = shader->key.opt.ngg_culling & SI_NGG_CULL_BACK_FACE;
|
||||
options.cull_small_prims = true; /* this would only be false with conservative rasterization */
|
||||
options.cull_zero_area = options.cull_front || options.cull_back;
|
||||
}
|
||||
|
||||
/* Tell ES threads whether their vertex survived. */
|
||||
LLVMValueRef params[] = {
|
||||
gs_accepted,
|
||||
|
|
@ -1995,7 +2008,7 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
|
|||
shader->previous_stage_sel ? shader->previous_stage_sel : gs_sel;
|
||||
const gl_shader_stage gs_stage = gs_sel->info.stage;
|
||||
const unsigned gs_num_invocations = MAX2(gs_sel->info.base.gs.invocations, 1);
|
||||
const unsigned input_prim = si_get_input_prim(gs_sel);
|
||||
const unsigned input_prim = si_get_input_prim(gs_sel, &shader->key);
|
||||
const bool use_adjacency =
|
||||
input_prim >= PIPE_PRIM_LINES_ADJACENCY && input_prim <= PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY;
|
||||
const unsigned max_verts_per_prim = u_vertices_per_prim(input_prim);
|
||||
|
|
|
|||
|
|
@ -1819,6 +1819,12 @@ static inline unsigned si_get_total_colormask(struct si_context *sctx)
|
|||
((1 << PIPE_PRIM_LINES) | (1 << PIPE_PRIM_LINE_LOOP) | (1 << PIPE_PRIM_LINE_STRIP) | \
|
||||
(1 << PIPE_PRIM_LINES_ADJACENCY) | (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY))
|
||||
|
||||
#define UTIL_ALL_PRIM_TRIANGLE_MODES \
|
||||
((1 << PIPE_PRIM_TRIANGLES) | (1 << PIPE_PRIM_TRIANGLE_STRIP) | \
|
||||
(1 << PIPE_PRIM_TRIANGLE_FAN) | (1 << PIPE_PRIM_QUADS) | (1 << PIPE_PRIM_QUAD_STRIP) | \
|
||||
(1 << PIPE_PRIM_POLYGON) | (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) | \
|
||||
(1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY))
|
||||
|
||||
static inline bool util_prim_is_lines(unsigned prim)
|
||||
{
|
||||
return ((1 << prim) & UTIL_ALL_PRIM_LINE_MODES) != 0;
|
||||
|
|
@ -1831,11 +1837,12 @@ static inline bool util_prim_is_points_or_lines(unsigned prim)
|
|||
|
||||
static inline bool util_rast_prim_is_triangles(unsigned prim)
|
||||
{
|
||||
return ((1 << prim) &
|
||||
((1 << PIPE_PRIM_TRIANGLES) | (1 << PIPE_PRIM_TRIANGLE_STRIP) |
|
||||
(1 << PIPE_PRIM_TRIANGLE_FAN) | (1 << PIPE_PRIM_QUADS) | (1 << PIPE_PRIM_QUAD_STRIP) |
|
||||
(1 << PIPE_PRIM_POLYGON) | (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) |
|
||||
(1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY)));
|
||||
return ((1 << prim) & UTIL_ALL_PRIM_TRIANGLE_MODES) != 0;
|
||||
}
|
||||
|
||||
static inline bool util_rast_prim_is_lines_or_triangles(unsigned prim)
|
||||
{
|
||||
return ((1 << prim) & (UTIL_ALL_PRIM_LINE_MODES | UTIL_ALL_PRIM_TRIANGLE_MODES)) != 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -287,6 +287,7 @@ enum
|
|||
#define SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(x) (((x) & 0x3) << 5) /* 0->0, 1->1, 2->2, 3->4 */
|
||||
#define SI_GET_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(x) (((x) >> 5) & 0x3)
|
||||
#define SI_NGG_CULL_GS_FAST_LAUNCH_ALL (0xf << 3) /* GS fast launch (both prim types) */
|
||||
#define SI_NGG_CULL_LINES (1 << 7) /* the primitive type is lines */
|
||||
|
||||
/**
|
||||
* For VS shader keys, describe any fixups required for vertex fetch.
|
||||
|
|
@ -685,7 +686,7 @@ struct si_shader_key {
|
|||
unsigned kill_pointsize : 1;
|
||||
|
||||
/* For NGG VS and TES. */
|
||||
unsigned ngg_culling : 7; /* SI_NGG_CULL_* */
|
||||
unsigned ngg_culling : 8; /* SI_NGG_CULL_* */
|
||||
|
||||
/* For shaders where monolithic variants have better code.
|
||||
*
|
||||
|
|
@ -963,7 +964,8 @@ static inline bool si_shader_uses_bindless_images(struct si_shader_selector *sel
|
|||
static inline bool gfx10_edgeflags_have_effect(struct si_shader *shader)
|
||||
{
|
||||
if (shader->selector->info.stage == MESA_SHADER_VERTEX &&
|
||||
!shader->selector->info.base.vs.blit_sgprs_amd)
|
||||
!shader->selector->info.base.vs.blit_sgprs_amd &&
|
||||
!(shader->key.opt.ngg_culling & SI_NGG_CULL_LINES))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -587,7 +587,7 @@ void si_get_vs_key_inputs(struct si_context *sctx, struct si_shader_key *key,
|
|||
void si_update_ps_inputs_read_or_disabled(struct si_context *sctx);
|
||||
void si_update_ps_kill_enable(struct si_context *sctx);
|
||||
void si_update_vrs_flat_shading(struct si_context *sctx);
|
||||
unsigned si_get_input_prim(const struct si_shader_selector *gs);
|
||||
unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_shader_key *key);
|
||||
bool si_update_ngg(struct si_context *sctx);
|
||||
void si_ps_key_update_framebuffer(struct si_context *sctx);
|
||||
void si_ps_key_update_framebuffer_blend(struct si_context *sctx);
|
||||
|
|
|
|||
|
|
@ -2157,8 +2157,8 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
|
||||
if (NGG && !HAS_GS &&
|
||||
/* Tessellation sets ngg_cull_vert_threshold to UINT_MAX if the prim type
|
||||
* is not triangles, so this check is only needed without tessellation. */
|
||||
(HAS_TESS || sctx->current_rast_prim == PIPE_PRIM_TRIANGLES) &&
|
||||
* is not points, so this check is only needed without tessellation. */
|
||||
(HAS_TESS || util_rast_prim_is_lines_or_triangles(sctx->current_rast_prim)) &&
|
||||
/* Only the first draw for a shader starts with culling disabled and it's disabled
|
||||
* until we pass the total_direct_count check and then it stays enabled until
|
||||
* the shader is changed. This eliminates most culling on/off state changes. */
|
||||
|
|
@ -2170,6 +2170,11 @@ static void si_draw_vbo(struct pipe_context *ctx,
|
|||
rs->ngg_cull_flags;
|
||||
assert(ngg_culling); /* rasterizer state should always set this to non-zero */
|
||||
|
||||
if (util_prim_is_lines(sctx->current_rast_prim)) {
|
||||
/* Overwrite it to mask out face cull flags. */
|
||||
ngg_culling = SI_NGG_CULL_ENABLED | SI_NGG_CULL_LINES;
|
||||
}
|
||||
|
||||
/* Use NGG fast launch for certain primitive types.
|
||||
* A draw must have at least 1 full primitive.
|
||||
* The fast launch doesn't work with tessellation.
|
||||
|
|
|
|||
|
|
@ -1094,7 +1094,7 @@ static void gfx10_emit_shader_ngg_tess_gs(struct si_context *sctx)
|
|||
gfx10_emit_shader_ngg_tail(sctx, shader);
|
||||
}
|
||||
|
||||
unsigned si_get_input_prim(const struct si_shader_selector *gs)
|
||||
unsigned si_get_input_prim(const struct si_shader_selector *gs, const struct si_shader_key *key)
|
||||
{
|
||||
if (gs->info.stage == MESA_SHADER_GEOMETRY)
|
||||
return gs->info.base.gs.input_primitive;
|
||||
|
|
@ -1107,7 +1107,9 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs)
|
|||
return PIPE_PRIM_TRIANGLES;
|
||||
}
|
||||
|
||||
/* TODO: Set this correctly if the primitive type is set in the shader key. */
|
||||
if (key->opt.ngg_culling & SI_NGG_CULL_LINES)
|
||||
return PIPE_PRIM_LINES;
|
||||
|
||||
return PIPE_PRIM_TRIANGLES; /* worst case for all callers */
|
||||
}
|
||||
|
||||
|
|
@ -1151,7 +1153,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
gs_info->base.vs.window_space_position : 0;
|
||||
bool es_enable_prim_id = shader->key.mono.u.vs_export_prim_id || es_info->uses_primid;
|
||||
unsigned gs_num_invocations = MAX2(gs_sel->info.base.gs.invocations, 1);
|
||||
unsigned input_prim = si_get_input_prim(gs_sel);
|
||||
unsigned input_prim = si_get_input_prim(gs_sel, &shader->key);
|
||||
bool break_wave_at_eoi = false;
|
||||
struct si_pm4_state *pm4 = si_get_shader_pm4_state(shader);
|
||||
if (!pm4)
|
||||
|
|
@ -2987,7 +2989,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
sel->ngg_cull_vert_threshold = 128;
|
||||
}
|
||||
} else if (sel->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
if (sel->rast_prim == PIPE_PRIM_TRIANGLES &&
|
||||
if (sel->rast_prim != PIPE_PRIM_POINTS &&
|
||||
(sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL) ||
|
||||
sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_TESS) ||
|
||||
sscreen->info.chip_class == GFX10_3))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue